From: Robert Richter <rrichter@amd.com>
Now, that the Component Register mappings are stored, use them to
enable and map the HDM decoder capabilities. The Component Registers
do not need to be probed again for this, remove probing code.
The HDM capability applies to Endpoints, USPs and VH Host Bridges. The
Endpoint's component register mappings are located in the cxlds and
else in the port's structure. Provide a helper function
cxl_port_get_comp_map() to locate the mappings depending on the
component's type.
Signed-off-by: Robert Richter <rrichter@amd.com>
Signed-off-by: Terry Bowman <terry.bowman@amd.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
---
drivers/cxl/core/hdm.c | 64 +++++++++++++++++++++++-------------------
1 file changed, 35 insertions(+), 29 deletions(-)
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 4449b34a80cc..b0f59e63e0d2 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -81,26 +81,6 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
cxlhdm->interleave_mask |= GENMASK(14, 12);
}
-static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,
- struct cxl_component_regs *regs)
-{
- struct cxl_register_map map = {
- .dev = &port->dev,
- .resource = port->component_reg_phys,
- .base = crb,
- .max_size = CXL_COMPONENT_REG_BLOCK_SIZE,
- };
-
- cxl_probe_component_regs(&port->dev, crb, &map.component_map);
- if (!map.component_map.hdm_decoder.valid) {
- dev_dbg(&port->dev, "HDM decoder registers not implemented\n");
- /* unique error code to indicate no HDM decoder capability */
- return -ENODEV;
- }
-
- return cxl_map_component_regs(&map, regs, BIT(CXL_CM_CAP_CAP_ID_HDM));
-}
-
static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
{
struct cxl_hdm *cxlhdm;
@@ -145,6 +125,22 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
return true;
}
+static struct cxl_register_map *cxl_port_get_comp_map(struct cxl_port *port)
+{
+ /*
+ * HDM capability applies to Endpoints, USPs and VH Host
+ * Bridges. The Endpoint's component register mappings are
+ * located in the cxlds.
+ */
+ if (is_cxl_endpoint(port)) {
+ struct cxl_memdev *memdev = to_cxl_memdev(port->uport_dev);
+
+ return &memdev->cxlds->comp_map;
+ }
+
+ return &port->comp_map;
+}
+
/**
* devm_cxl_setup_hdm - map HDM decoder component registers
* @port: cxl_port to map
@@ -155,7 +151,7 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
{
struct device *dev = &port->dev;
struct cxl_hdm *cxlhdm;
- void __iomem *crb;
+ struct cxl_register_map *comp_map;
int rc;
cxlhdm = devm_kzalloc(dev, sizeof(*cxlhdm), GFP_KERNEL);
@@ -164,19 +160,29 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
cxlhdm->port = port;
dev_set_drvdata(dev, cxlhdm);
- crb = ioremap(port->component_reg_phys, CXL_COMPONENT_REG_BLOCK_SIZE);
- if (!crb && info && info->mem_enabled) {
- cxlhdm->decoder_count = info->ranges;
- return cxlhdm;
- } else if (!crb) {
+ comp_map = cxl_port_get_comp_map(port);
+
+ if (comp_map->resource == CXL_RESOURCE_NONE) {
+ if (info && info->mem_enabled) {
+ cxlhdm->decoder_count = info->ranges;
+ return cxlhdm;
+ }
dev_err(dev, "No component registers mapped\n");
return ERR_PTR(-ENXIO);
}
- rc = map_hdm_decoder_regs(port, crb, &cxlhdm->regs);
- iounmap(crb);
- if (rc)
+ if (!comp_map->component_map.hdm_decoder.valid) {
+ dev_dbg(&port->dev, "HDM decoder registers not implemented\n");
+ /* unique error code to indicate no HDM decoder capability */
+ return ERR_PTR(-ENODEV);
+ }
+
+ rc = cxl_map_component_regs(comp_map, &cxlhdm->regs,
+ BIT(CXL_CM_CAP_CAP_ID_HDM));
+ if (rc) {
+ dev_dbg(dev, "Failed to map HDM capability.\n");
return ERR_PTR(rc);
+ }
parse_hdm_decoder_caps(cxlhdm);
if (cxlhdm->decoder_count == 0) {
--
2.34.1
Terry Bowman wrote:
> From: Robert Richter <rrichter@amd.com>
>
> Now, that the Component Register mappings are stored, use them to
> enable and map the HDM decoder capabilities. The Component Registers
> do not need to be probed again for this, remove probing code.
>
> The HDM capability applies to Endpoints, USPs and VH Host Bridges. The
> Endpoint's component register mappings are located in the cxlds and
> else in the port's structure. Provide a helper function
> cxl_port_get_comp_map() to locate the mappings depending on the
> component's type.
This causes a regression that cxl_test tripped over. It's likely
something you could reproduce without cxl_test just be reloading the
cxl_port driver. Root cause below, but here is what I see on a test run:
# meson test -C build --suite cxl
ninja: no work to do.
ninja: Entering directory `/root/git/ndctl/build'
[113/113] Linking target ndctl/ndctl
1/6 ndctl:cxl / cxl-topology.sh OK 12.78s
2/6 ndctl:cxl / cxl-region-sysfs.sh OK 7.72s
3/6 ndctl:cxl / cxl-labels.sh FAIL 1.53s (exit status 129 or signal 1 SIGHUP)
>>> LD_LIBRARY_PATH=/root/git/ndctl/build/cxl/lib:/root/git/ndctl/build/ndctl/lib:/root/git/ndctl/build/daxctl/lib NDCTL=/root/git/ndctl/build/ndctl/ndctl DAXCTL=/root/git/ndctl/build/daxctl/daxctl MALLOC_PERTURB_=67 TEST_PATH=/root/git/ndctl/build/test DATA_PATH=/root/git/ndctl/test /bin/bash /root/git/ndctl/test/cxl-labels.sh
4/6 ndctl:cxl / cxl-create-region.sh OK 17.05s
5/6 ndctl:cxl / cxl-xor-region.sh OK 5.18s
6/6 ndctl:cxl / cxl-security.sh OK 4.68s
Ok: 5
Expected Fail: 0
Fail: 1
Unexpected Pass: 0
Skipped: 0
Timeout: 0
Full log written to /root/git/ndctl/build/meson-logs/testlog.txt
It's not that cxl-labels.sh causes the error, it is loading and
unloading the port driver trips over the problem below:
>
> Signed-off-by: Robert Richter <rrichter@amd.com>
> Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> ---
> drivers/cxl/core/hdm.c | 64 +++++++++++++++++++++++-------------------
> 1 file changed, 35 insertions(+), 29 deletions(-)
>
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 4449b34a80cc..b0f59e63e0d2 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -81,26 +81,6 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
> cxlhdm->interleave_mask |= GENMASK(14, 12);
> }
>
> -static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,
> - struct cxl_component_regs *regs)
> -{
> - struct cxl_register_map map = {
> - .dev = &port->dev,
> - .resource = port->component_reg_phys,
> - .base = crb,
> - .max_size = CXL_COMPONENT_REG_BLOCK_SIZE,
> - };
> -
> - cxl_probe_component_regs(&port->dev, crb, &map.component_map);
> - if (!map.component_map.hdm_decoder.valid) {
> - dev_dbg(&port->dev, "HDM decoder registers not implemented\n");
> - /* unique error code to indicate no HDM decoder capability */
> - return -ENODEV;
> - }
> -
> - return cxl_map_component_regs(&map, regs, BIT(CXL_CM_CAP_CAP_ID_HDM));
> -}
> -
> static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> {
> struct cxl_hdm *cxlhdm;
> @@ -145,6 +125,22 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> return true;
> }
>
> +static struct cxl_register_map *cxl_port_get_comp_map(struct cxl_port *port)
> +{
> + /*
> + * HDM capability applies to Endpoints, USPs and VH Host
> + * Bridges. The Endpoint's component register mappings are
> + * located in the cxlds.
> + */
> + if (is_cxl_endpoint(port)) {
> + struct cxl_memdev *memdev = to_cxl_memdev(port->uport_dev);
> +
> + return &memdev->cxlds->comp_map;
...but why? The issue here is that the @dev argument in that map is the
memdev parent PCI device. However, in this context the @dev for devm
operations wants to be &port->dev.
> + }
> +
> + return &port->comp_map;
...so this is fine, and folding in the following resolves the test
failure.
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index b0f59e63e0d2..6f111f487795 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -125,22 +125,6 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
return true;
}
-static struct cxl_register_map *cxl_port_get_comp_map(struct cxl_port *port)
-{
- /*
- * HDM capability applies to Endpoints, USPs and VH Host
- * Bridges. The Endpoint's component register mappings are
- * located in the cxlds.
- */
- if (is_cxl_endpoint(port)) {
- struct cxl_memdev *memdev = to_cxl_memdev(port->uport_dev);
-
- return &memdev->cxlds->comp_map;
- }
-
- return &port->comp_map;
-}
-
/**
* devm_cxl_setup_hdm - map HDM decoder component registers
* @port: cxl_port to map
@@ -160,8 +144,7 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
cxlhdm->port = port;
dev_set_drvdata(dev, cxlhdm);
- comp_map = cxl_port_get_comp_map(port);
-
+ comp_map = &port->comp_map;
if (comp_map->resource == CXL_RESOURCE_NONE) {
if (info && info->mem_enabled) {
cxlhdm->decoder_count = info->ranges;
Am I missing why the cxlds->comp_map needs to be reused?
Note that I am out and may not be able to respond further until next
week.
Dan,
On 03.07.23 17:39:38, Dan Williams wrote:
> Terry Bowman wrote:
> > From: Robert Richter <rrichter@amd.com>
> >
> > Now, that the Component Register mappings are stored, use them to
> > enable and map the HDM decoder capabilities. The Component Registers
> > do not need to be probed again for this, remove probing code.
> >
> > The HDM capability applies to Endpoints, USPs and VH Host Bridges. The
> > Endpoint's component register mappings are located in the cxlds and
> > else in the port's structure. Provide a helper function
> > cxl_port_get_comp_map() to locate the mappings depending on the
> > component's type.
>
> This causes a regression that cxl_test tripped over. It's likely
> something you could reproduce without cxl_test just be reloading the
> cxl_port driver. Root cause below, but here is what I see on a test run:
>
> # meson test -C build --suite cxl
> ninja: no work to do.
> ninja: Entering directory `/root/git/ndctl/build'
> [113/113] Linking target ndctl/ndctl
> 1/6 ndctl:cxl / cxl-topology.sh OK 12.78s
> 2/6 ndctl:cxl / cxl-region-sysfs.sh OK 7.72s
> 3/6 ndctl:cxl / cxl-labels.sh FAIL 1.53s (exit status 129 or signal 1 SIGHUP)
> >>> LD_LIBRARY_PATH=/root/git/ndctl/build/cxl/lib:/root/git/ndctl/build/ndctl/lib:/root/git/ndctl/build/daxctl/lib NDCTL=/root/git/ndctl/build/ndctl/ndctl DAXCTL=/root/git/ndctl/build/daxctl/daxctl MALLOC_PERTURB_=67 TEST_PATH=/root/git/ndctl/build/test DATA_PATH=/root/git/ndctl/test /bin/bash /root/git/ndctl/test/cxl-labels.sh
I have tried various combinations of revisions and setups (patch #3,
#5 and #14, ndctl:pending/v77, cxl_test only with both, "ACPI0017" and
cxl_pci_probe() disabled), but could not reproduce this. Tests always
pass for me:
[51/51] Linking target ndctl/ndctl
1/6 ndctl:cxl / cxl-topology.sh OK 18.59s
2/6 ndctl:cxl / cxl-region-sysfs.sh OK 12.26s
3/6 ndctl:cxl / cxl-labels.sh OK 28.25s
4/6 ndctl:cxl / cxl-create-region.sh OK 19.56s
5/6 ndctl:cxl / cxl-xor-region.sh OK 10.57s
6/6 ndctl:cxl / cxl-security.sh OK 9.77s
Ok: 6
Expected Fail: 0
Fail: 0
Unexpected Pass: 0
Skipped: 0
Timeout: 0
Full log written to /root/ndctl/build/meson-logs/testlog.txt
>
> 4/6 ndctl:cxl / cxl-create-region.sh OK 17.05s
> 5/6 ndctl:cxl / cxl-xor-region.sh OK 5.18s
> 6/6 ndctl:cxl / cxl-security.sh OK 4.68s
>
> Ok: 5
> Expected Fail: 0
> Fail: 1
> Unexpected Pass: 0
> Skipped: 0
> Timeout: 0
>
> Full log written to /root/git/ndctl/build/meson-logs/testlog.txt
>
> It's not that cxl-labels.sh causes the error, it is loading and
> unloading the port driver trips over the problem below:
>
> >
> > Signed-off-by: Robert Richter <rrichter@amd.com>
> > Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> > ---
> > drivers/cxl/core/hdm.c | 64 +++++++++++++++++++++++-------------------
> > 1 file changed, 35 insertions(+), 29 deletions(-)
> >
> > diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> > index 4449b34a80cc..b0f59e63e0d2 100644
> > --- a/drivers/cxl/core/hdm.c
> > +++ b/drivers/cxl/core/hdm.c
> > @@ -81,26 +81,6 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
> > cxlhdm->interleave_mask |= GENMASK(14, 12);
> > }
> >
> > -static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,
> > - struct cxl_component_regs *regs)
> > -{
> > - struct cxl_register_map map = {
> > - .dev = &port->dev,
> > - .resource = port->component_reg_phys,
> > - .base = crb,
> > - .max_size = CXL_COMPONENT_REG_BLOCK_SIZE,
> > - };
> > -
> > - cxl_probe_component_regs(&port->dev, crb, &map.component_map);
> > - if (!map.component_map.hdm_decoder.valid) {
> > - dev_dbg(&port->dev, "HDM decoder registers not implemented\n");
> > - /* unique error code to indicate no HDM decoder capability */
> > - return -ENODEV;
> > - }
> > -
> > - return cxl_map_component_regs(&map, regs, BIT(CXL_CM_CAP_CAP_ID_HDM));
> > -}
> > -
> > static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> > {
> > struct cxl_hdm *cxlhdm;
> > @@ -145,6 +125,22 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> > return true;
> > }
> >
> > +static struct cxl_register_map *cxl_port_get_comp_map(struct cxl_port *port)
> > +{
> > + /*
> > + * HDM capability applies to Endpoints, USPs and VH Host
> > + * Bridges. The Endpoint's component register mappings are
> > + * located in the cxlds.
> > + */
> > + if (is_cxl_endpoint(port)) {
> > + struct cxl_memdev *memdev = to_cxl_memdev(port->uport_dev);
> > +
> > + return &memdev->cxlds->comp_map;
>
> ...but why? The issue here is that the @dev argument in that map is the
> memdev parent PCI device. However, in this context the @dev for devm
> operations wants to be &port->dev.
The cxl_pci driver stores the comp_map of the endpoint in the cxlds
structure, struct cxl_port is not yet available at this point. See
patch #2 of this series ("cxl/pci: Store the endpoint's Component
Register mappings in struct cxl_dev_state").
>
> > + }
> > +
> > + return &port->comp_map;
>
> ...so this is fine, and folding in the following resolves the test
> failure.
>
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index b0f59e63e0d2..6f111f487795 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -125,22 +125,6 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> return true;
> }
>
> -static struct cxl_register_map *cxl_port_get_comp_map(struct cxl_port *port)
> -{
> - /*
> - * HDM capability applies to Endpoints, USPs and VH Host
> - * Bridges. The Endpoint's component register mappings are
> - * located in the cxlds.
> - */
> - if (is_cxl_endpoint(port)) {
> - struct cxl_memdev *memdev = to_cxl_memdev(port->uport_dev);
> -
> - return &memdev->cxlds->comp_map;
> - }
> -
> - return &port->comp_map;
> -}
> -
> /**
> * devm_cxl_setup_hdm - map HDM decoder component registers
> * @port: cxl_port to map
> @@ -160,8 +144,7 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
> cxlhdm->port = port;
> dev_set_drvdata(dev, cxlhdm);
>
> - comp_map = cxl_port_get_comp_map(port);
> -
> + comp_map = &port->comp_map;
Can you check if the following works instead, I think the
pre-initialization is missing in cxl_mock_mem_probe() for
cxl_test:
diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
index d6d067fbee97..4c4e33de4d74 100644
--- a/drivers/cxl/core/mbox.c
+++ b/drivers/cxl/core/mbox.c
@@ -1333,6 +1333,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
mutex_init(&mds->mbox_mutex);
mutex_init(&mds->event.log_lock);
mds->cxlds.dev = dev;
+ mds->cxlds.comp_map.dev = dev;
+ mds->cxlds.comp_map.resource = CXL_RESOURCE_NONE;
mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
return mds;
--
2.30.2
The cxl_pci driver always has something valid or fails otherwise.
If that works the change should be merged into patch #2.
Thanks,
-Robert
> if (comp_map->resource == CXL_RESOURCE_NONE) {
> if (info && info->mem_enabled) {
> cxlhdm->decoder_count = info->ranges;
>
>
> Am I missing why the cxlds->comp_map needs to be reused?
>
> Note that I am out and may not be able to respond further until next
> week.
Robert Richter wrote:
> Dan,
>
> On 03.07.23 17:39:38, Dan Williams wrote:
> > Terry Bowman wrote:
> > > From: Robert Richter <rrichter@amd.com>
> > >
> > > Now, that the Component Register mappings are stored, use them to
> > > enable and map the HDM decoder capabilities. The Component Registers
> > > do not need to be probed again for this, remove probing code.
> > >
> > > The HDM capability applies to Endpoints, USPs and VH Host Bridges. The
> > > Endpoint's component register mappings are located in the cxlds and
> > > else in the port's structure. Provide a helper function
> > > cxl_port_get_comp_map() to locate the mappings depending on the
> > > component's type.
> >
> > This causes a regression that cxl_test tripped over. It's likely
> > something you could reproduce without cxl_test just be reloading the
> > cxl_port driver. Root cause below, but here is what I see on a test run:
> >
> > # meson test -C build --suite cxl
> > ninja: no work to do.
> > ninja: Entering directory `/root/git/ndctl/build'
> > [113/113] Linking target ndctl/ndctl
> > 1/6 ndctl:cxl / cxl-topology.sh OK 12.78s
> > 2/6 ndctl:cxl / cxl-region-sysfs.sh OK 7.72s
> > 3/6 ndctl:cxl / cxl-labels.sh FAIL 1.53s (exit status 129 or signal 1 SIGHUP)
> > >>> LD_LIBRARY_PATH=/root/git/ndctl/build/cxl/lib:/root/git/ndctl/build/ndctl/lib:/root/git/ndctl/build/daxctl/lib NDCTL=/root/git/ndctl/build/ndctl/ndctl DAXCTL=/root/git/ndctl/build/daxctl/daxctl MALLOC_PERTURB_=67 TEST_PATH=/root/git/ndctl/build/test DATA_PATH=/root/git/ndctl/test /bin/bash /root/git/ndctl/test/cxl-labels.sh
>
> I have tried various combinations of revisions and setups (patch #3,
> #5 and #14, ndctl:pending/v77, cxl_test only with both, "ACPI0017" and
> cxl_pci_probe() disabled), but could not reproduce this. Tests always
> pass for me:
Apologies for the long delay in getting back to this. July had some
major personal interrupts to attend.
>
> [51/51] Linking target ndctl/ndctl
> 1/6 ndctl:cxl / cxl-topology.sh OK 18.59s
> 2/6 ndctl:cxl / cxl-region-sysfs.sh OK 12.26s
> 3/6 ndctl:cxl / cxl-labels.sh OK 28.25s
> 4/6 ndctl:cxl / cxl-create-region.sh OK 19.56s
> 5/6 ndctl:cxl / cxl-xor-region.sh OK 10.57s
> 6/6 ndctl:cxl / cxl-security.sh OK 9.77s
>
> Ok: 6
> Expected Fail: 0
> Fail: 0
> Unexpected Pass: 0
> Skipped: 0
> Timeout: 0
>
> Full log written to /root/ndctl/build/meson-logs/testlog.txt
So it turns out it is not cxl_test that is failing instead it is the
tests noticing a regression of the VH base case. I am running
cxl-topology.sh in a QEMU environment with a local device defined, and
that local device is hitting a probe regression.
When cxl_test goes to count the expected disabled devices it sees one
more because the QEMU device is also disabled.
++ jq 'map(select(.state == "disabled")) | length'
+ count=5
+ (( count == 4 ))
+ err 170
++ basename /root/git/ndctl/test/cxl-topology.sh
+ echo test/cxl-topology.sh: failed at line 170
...i.e. only 4 devices are expected to be disabled, not the 5th one that
was not on the cxl_test bus. I assume you are running without any other
CXL devices?
I will look into making that a more formalized case so that failure is
not QEMU configuration dependent, but please make sure that QEMU CXL
configs do not regress.
> >
> > 4/6 ndctl:cxl / cxl-create-region.sh OK 17.05s
> > 5/6 ndctl:cxl / cxl-xor-region.sh OK 5.18s
> > 6/6 ndctl:cxl / cxl-security.sh OK 4.68s
> >
> > Ok: 5
> > Expected Fail: 0
> > Fail: 1
> > Unexpected Pass: 0
> > Skipped: 0
> > Timeout: 0
> >
> > Full log written to /root/git/ndctl/build/meson-logs/testlog.txt
> >
> > It's not that cxl-labels.sh causes the error, it is loading and
> > unloading the port driver trips over the problem below:
> >
> > >
> > > Signed-off-by: Robert Richter <rrichter@amd.com>
> > > Signed-off-by: Terry Bowman <terry.bowman@amd.com>
> > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
> > > Reviewed-by: Dave Jiang <dave.jiang@intel.com>
> > > ---
> > > drivers/cxl/core/hdm.c | 64 +++++++++++++++++++++++-------------------
> > > 1 file changed, 35 insertions(+), 29 deletions(-)
> > >
> > > diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> > > index 4449b34a80cc..b0f59e63e0d2 100644
> > > --- a/drivers/cxl/core/hdm.c
> > > +++ b/drivers/cxl/core/hdm.c
> > > @@ -81,26 +81,6 @@ static void parse_hdm_decoder_caps(struct cxl_hdm *cxlhdm)
> > > cxlhdm->interleave_mask |= GENMASK(14, 12);
> > > }
> > >
> > > -static int map_hdm_decoder_regs(struct cxl_port *port, void __iomem *crb,
> > > - struct cxl_component_regs *regs)
> > > -{
> > > - struct cxl_register_map map = {
> > > - .dev = &port->dev,
> > > - .resource = port->component_reg_phys,
> > > - .base = crb,
> > > - .max_size = CXL_COMPONENT_REG_BLOCK_SIZE,
> > > - };
> > > -
> > > - cxl_probe_component_regs(&port->dev, crb, &map.component_map);
> > > - if (!map.component_map.hdm_decoder.valid) {
> > > - dev_dbg(&port->dev, "HDM decoder registers not implemented\n");
> > > - /* unique error code to indicate no HDM decoder capability */
> > > - return -ENODEV;
> > > - }
> > > -
> > > - return cxl_map_component_regs(&map, regs, BIT(CXL_CM_CAP_CAP_ID_HDM));
> > > -}
> > > -
> > > static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> > > {
> > > struct cxl_hdm *cxlhdm;
> > > @@ -145,6 +125,22 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> > > return true;
> > > }
> > >
> > > +static struct cxl_register_map *cxl_port_get_comp_map(struct cxl_port *port)
> > > +{
> > > + /*
> > > + * HDM capability applies to Endpoints, USPs and VH Host
> > > + * Bridges. The Endpoint's component register mappings are
> > > + * located in the cxlds.
> > > + */
> > > + if (is_cxl_endpoint(port)) {
> > > + struct cxl_memdev *memdev = to_cxl_memdev(port->uport_dev);
> > > +
> > > + return &memdev->cxlds->comp_map;
> >
> > ...but why? The issue here is that the @dev argument in that map is the
> > memdev parent PCI device. However, in this context the @dev for devm
> > operations wants to be &port->dev.
>
> The cxl_pci driver stores the comp_map of the endpoint in the cxlds
> structure, struct cxl_port is not yet available at this point.
When you say "this point" you mean "that point" in cxl_pci, right? I
initially took that to mean literally "this" point in the quoted code
above.
> See patch #2 of this series ("cxl/pci: Store the endpoint's Component
> Register mappings in struct cxl_dev_state").
>
> >
> > > + }
> > > +
> > > + return &port->comp_map;
> >
> > ...so this is fine, and folding in the following resolves the test
> > failure.
> >
> > diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> > index b0f59e63e0d2..6f111f487795 100644
> > --- a/drivers/cxl/core/hdm.c
> > +++ b/drivers/cxl/core/hdm.c
> > @@ -125,22 +125,6 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> > return true;
> > }
> >
> > -static struct cxl_register_map *cxl_port_get_comp_map(struct cxl_port *port)
> > -{
> > - /*
> > - * HDM capability applies to Endpoints, USPs and VH Host
> > - * Bridges. The Endpoint's component register mappings are
> > - * located in the cxlds.
> > - */
> > - if (is_cxl_endpoint(port)) {
> > - struct cxl_memdev *memdev = to_cxl_memdev(port->uport_dev);
> > -
> > - return &memdev->cxlds->comp_map;
> > - }
> > -
> > - return &port->comp_map;
> > -}
> > -
> > /**
> > * devm_cxl_setup_hdm - map HDM decoder component registers
> > * @port: cxl_port to map
> > @@ -160,8 +144,7 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
> > cxlhdm->port = port;
> > dev_set_drvdata(dev, cxlhdm);
> >
> > - comp_map = cxl_port_get_comp_map(port);
> > -
> > + comp_map = &port->comp_map;
>
> Can you check if the following works instead, I think the
> pre-initialization is missing in cxl_mock_mem_probe() for
> cxl_test:
>
>
> diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> index d6d067fbee97..4c4e33de4d74 100644
> --- a/drivers/cxl/core/mbox.c
> +++ b/drivers/cxl/core/mbox.c
> @@ -1333,6 +1333,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
> mutex_init(&mds->mbox_mutex);
> mutex_init(&mds->event.log_lock);
> mds->cxlds.dev = dev;
> + mds->cxlds.comp_map.dev = dev;
> + mds->cxlds.comp_map.resource = CXL_RESOURCE_NONE;
This has the same problem. @dev is specifying the lifetime of the
mapping. The lifetime of the mapping needs to be relative to the driver
using the registers. So if the cxl_port driver is mapping the component
registers the only valid device in the comp_map is &port->dev.
I notice that cxl_port_get_comp_map() endpoint port as an argument. That
endpoint port was instantiated with @cxlmd, but it seems not with
@cxlmd->cxlds->comp_map information which is available. Lets just fix
that. I.e. move devm_cxl_add_endpoint() into the core and make it
initialize @endpoint->comp_map with @cxlds->comp_map while switching
@dev in the @endpoint->comp_map to be @endpoint->dev, and not
@cxlds->dev.
...but it turns out that is the second bug in this patch. As I went to
try to reproduce this failure for a single port VH configuration I
notice another bug, a regression of this fix:
7bba261e0aa6 cxl/port: Scan single-target ports for decoders
...because there is no requirement for single port switches /
host-bridges to have HDM decoders per the specification, and the
original patch is turning HDM decoders not present as a hard failure.
> mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
>
> return mds;
> --
> 2.30.2
>
> The cxl_pci driver always has something valid or fails otherwise.
Understood, just copy that map at __devm_cxl_add_port() time. I am
thinking devm_cxl_add_endpoint() moves into core/port.c and it calls
__devm_cxl_add_port() directly with a new parameter to take a passed in
@comp_map or @component_reg_phys for the switch port case.
>
> If that works the change should be merged into patch #2.
>
> Thanks,
>
> -Robert
>
>
> > if (comp_map->resource == CXL_RESOURCE_NONE) {
> > if (info && info->mem_enabled) {
> > cxlhdm->decoder_count = info->ranges;
> >
> >
> > Am I missing why the cxlds->comp_map needs to be reused?
> >
> > Note that I am out and may not be able to respond further until next
> > week.
On 07.08.23 20:00:41, Dan Williams wrote:
> Robert Richter wrote:
> > On 03.07.23 17:39:38, Dan Williams wrote:
> > [51/51] Linking target ndctl/ndctl
> > 1/6 ndctl:cxl / cxl-topology.sh OK 18.59s
> > 2/6 ndctl:cxl / cxl-region-sysfs.sh OK 12.26s
> > 3/6 ndctl:cxl / cxl-labels.sh OK 28.25s
> > 4/6 ndctl:cxl / cxl-create-region.sh OK 19.56s
> > 5/6 ndctl:cxl / cxl-xor-region.sh OK 10.57s
> > 6/6 ndctl:cxl / cxl-security.sh OK 9.77s
> >
> > Ok: 6
> > Expected Fail: 0
> > Fail: 0
> > Unexpected Pass: 0
> > Skipped: 0
> > Timeout: 0
> >
> > Full log written to /root/ndctl/build/meson-logs/testlog.txt
>
> So it turns out it is not cxl_test that is failing instead it is the
> tests noticing a regression of the VH base case. I am running
> cxl-topology.sh in a QEMU environment with a local device defined, and
> that local device is hitting a probe regression.
>
> When cxl_test goes to count the expected disabled devices it sees one
> more because the QEMU device is also disabled.
>
> ++ jq 'map(select(.state == "disabled")) | length'
> + count=5
> + (( count == 4 ))
> + err 170
> ++ basename /root/git/ndctl/test/cxl-topology.sh
> + echo test/cxl-topology.sh: failed at line 170
>
> ...i.e. only 4 devices are expected to be disabled, not the 5th one that
> was not on the cxl_test bus. I assume you are running without any other
> CXL devices?
>
> I will look into making that a more formalized case so that failure is
> not QEMU configuration dependent, but please make sure that QEMU CXL
> configs do not regress.
I finally could reproduce and root cause the issue.
I have isolated this to the operation on the qemu mem0 device, without
cxl_test involved:
root@qemu:~# cxl list
[
{
"memdev":"mem0",
"pmem_size":536870912,
"serial":0,
"host":"0000:35:00.0"
}
]
root@qemu:~# echo mem0 >/sys/bus/cxl/drivers/cxl_mem/unbind
[ 65.957423] nvdimm nmem0: trace
[ 66.001986] nd_bus ndbus0: nvdimm.remove(nmem0)
[ 66.041053] cxl_mem mem0: disconnect mem0 from port1
root@qemu:~# echo mem0 >/sys/bus/cxl/drivers/cxl_mem/bind
[ 75.076881] cxl_mem mem0: scan: iter: mem0 dport_dev: 0000:34:00.0 parent: pci0000:34
[ 75.077349] cxl_mem mem0: found already registered port port1:pci0000:34
[ 75.077653] cxl_mem mem0: host-bridge: pci0000:34
[ 75.078003] cxl_pci 0000:35:00.0: Failed to request region 0x00000000fe841110-0x00000000fe84119f
[ 75.078415] cxl_port endpoint2: Failed to map HDM capability.
[ 75.078683] cxl_port endpoint2: probe: -12
[ 75.078911] cxl_port: probe of endpoint2 failed with error -12
[ 75.079223] cxl_mem mem0: endpoint2 added to port1
[ 75.079469] cxl_mem mem0: endpoint2 failed probe
[ 75.079706] cxl_mem mem0: probe: -6
[ 75.079943] cxl_mem mem0: disconnect mem0 from port1
bash: echo: write error: No such device or address
root@qemu:~# cxl list -i
[
{
"memdev":"mem0",
"pmem_size":536870912,
"serial":0,
"host":"0000:35:00.0",
"state":"disabled"
}
]
The issue causes ndctl cxl tests with the cxl_test module to fail too.
> > > > +static struct cxl_register_map *cxl_port_get_comp_map(struct cxl_port *port)
> > > > +{
> > > > + /*
> > > > + * HDM capability applies to Endpoints, USPs and VH Host
> > > > + * Bridges. The Endpoint's component register mappings are
> > > > + * located in the cxlds.
> > > > + */
> > > > + if (is_cxl_endpoint(port)) {
> > > > + struct cxl_memdev *memdev = to_cxl_memdev(port->uport_dev);
> > > > +
> > > > + return &memdev->cxlds->comp_map;
> > >
> > > ...but why? The issue here is that the @dev argument in that map is the
> > > memdev parent PCI device. However, in this context the @dev for devm
> > > operations wants to be &port->dev.
Right, I fixed this by changing the function i/f to pass @dev for the
devm operation. See below.
> >
> > The cxl_pci driver stores the comp_map of the endpoint in the cxlds
> > structure, struct cxl_port is not yet available at this point.
>
> When you say "this point" you mean "that point" in cxl_pci, right? I
> initially took that to mean literally "this" point in the quoted code
> above.
Yes, exactly. The mapping is in cxlds->comp_map, not port->comp_map.
>
> > See patch #2 of this series ("cxl/pci: Store the endpoint's Component
> > Register mappings in struct cxl_dev_state").
> >
> > >
> > > > + }
> > > > +
> > > > + return &port->comp_map;
> > >
> > > ...so this is fine, and folding in the following resolves the test
> > > failure.
> > >
> > > diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> > > index b0f59e63e0d2..6f111f487795 100644
> > > --- a/drivers/cxl/core/hdm.c
> > > +++ b/drivers/cxl/core/hdm.c
> > > @@ -125,22 +125,6 @@ static bool should_emulate_decoders(struct cxl_endpoint_dvsec_info *info)
> > > return true;
> > > }
> > >
> > > -static struct cxl_register_map *cxl_port_get_comp_map(struct cxl_port *port)
> > > -{
> > > - /*
> > > - * HDM capability applies to Endpoints, USPs and VH Host
> > > - * Bridges. The Endpoint's component register mappings are
> > > - * located in the cxlds.
> > > - */
> > > - if (is_cxl_endpoint(port)) {
> > > - struct cxl_memdev *memdev = to_cxl_memdev(port->uport_dev);
> > > -
> > > - return &memdev->cxlds->comp_map;
> > > - }
> > > -
> > > - return &port->comp_map;
> > > -}
> > > -
> > > /**
> > > * devm_cxl_setup_hdm - map HDM decoder component registers
> > > * @port: cxl_port to map
> > > @@ -160,8 +144,7 @@ struct cxl_hdm *devm_cxl_setup_hdm(struct cxl_port *port,
> > > cxlhdm->port = port;
> > > dev_set_drvdata(dev, cxlhdm);
> > >
> > > - comp_map = cxl_port_get_comp_map(port);
> > > -
> > > + comp_map = &port->comp_map;
> >
> > Can you check if the following works instead, I think the
> > pre-initialization is missing in cxl_mock_mem_probe() for
> > cxl_test:
> >
> >
> > diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c
> > index d6d067fbee97..4c4e33de4d74 100644
> > --- a/drivers/cxl/core/mbox.c
> > +++ b/drivers/cxl/core/mbox.c
> > @@ -1333,6 +1333,8 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev)
> > mutex_init(&mds->mbox_mutex);
> > mutex_init(&mds->event.log_lock);
> > mds->cxlds.dev = dev;
> > + mds->cxlds.comp_map.dev = dev;
> > + mds->cxlds.comp_map.resource = CXL_RESOURCE_NONE;
>
> This has the same problem. @dev is specifying the lifetime of the
> mapping. The lifetime of the mapping needs to be relative to the driver
> using the registers. So if the cxl_port driver is mapping the component
> registers the only valid device in the comp_map is &port->dev.
>
> I notice that cxl_port_get_comp_map() endpoint port as an argument. That
> endpoint port was instantiated with @cxlmd, but it seems not with
> @cxlmd->cxlds->comp_map information which is available. Lets just fix
> that. I.e. move devm_cxl_add_endpoint() into the core and make it
> initialize @endpoint->comp_map with @cxlds->comp_map while switching
> @dev in the @endpoint->comp_map to be @endpoint->dev, and not
> @cxlds->dev.
That does not work as we need the RAS cap early once cxl_pci is bound.
There are multiple devices using the comp_map. For proper devm release
we need to assign different devices depending on the users. That is,
the RAS cap is released with cxl_pci driver unbinding and HDM with the
memdev release. The fix I have chosen is to expand
cxl_map_component_regs() to pass the devm device to release it
together with the device using it.
>
> ...but it turns out that is the second bug in this patch. As I went to
> try to reproduce this failure for a single port VH configuration I
> notice another bug, a regression of this fix:
>
> 7bba261e0aa6 cxl/port: Scan single-target ports for decoders
>
> ...because there is no requirement for single port switches /
> host-bridges to have HDM decoders per the specification, and the
> original patch is turning HDM decoders not present as a hard failure.
But if the HDM is not present, a -ENODEV is still returned?
cxl_switch_port_probe() does not fail then.
>
> > mds->cxlds.type = CXL_DEVTYPE_CLASSMEM;
> >
> > return mds;
> > --
> > 2.30.2
> >
> > The cxl_pci driver always has something valid or fails otherwise.
>
> Understood, just copy that map at __devm_cxl_add_port() time. I am
> thinking devm_cxl_add_endpoint() moves into core/port.c and it calls
> __devm_cxl_add_port() directly with a new parameter to take a passed in
> @comp_map or @component_reg_phys for the switch port case.
Please take a look at the cxl_map_component_regs() update in v9. We
will send it out today. The changes compared to v8 are
straightforward.
Thanks,
-Robert
>
> >
> > If that works the change should be merged into patch #2.
> >
> > Thanks,
> >
> > -Robert
> >
> >
> > > if (comp_map->resource == CXL_RESOURCE_NONE) {
> > > if (info && info->mem_enabled) {
> > > cxlhdm->decoder_count = info->ranges;
> > >
> > >
> > > Am I missing why the cxlds->comp_map needs to be reused?
> > >
> > > Note that I am out and may not be able to respond further until next
> > > week.
>
>
© 2016 - 2026 Red Hat, Inc.