[PATCH V3 13/20] cxl/mem: Refactor cxl pmem region auto-assembling

Neeraj Kumar posted 20 patches 4 months, 3 weeks ago
There is a newer version of this series
[PATCH V3 13/20] cxl/mem: Refactor cxl pmem region auto-assembling
Posted by Neeraj Kumar 4 months, 3 weeks ago
In 84ec985944ef3, devm_cxl_add_nvdimm() sequence was changed and called
before devm_cxl_add_endpoint(). It's because cxl pmem region auto-assembly
used to get called at last in cxl_endpoint_port_probe(), which requires
cxl_nvd presence.

For cxl region persistency, region creation happens during nvdimm_probe
which need the completion of endpoint probe.

In order to accommodate both cxl pmem region auto-assembly and cxl region
persistency, refactored following

1. Re-Sequence devm_cxl_add_nvdimm() after devm_cxl_add_endpoint(). This
   will be called only after successful completion of endpoint probe.

2. Moved cxl pmem region auto-assembly from cxl_endpoint_port_probe() to
   cxl_mem_probe() after devm_cxl_add_nvdimm(). It gurantees both the
   completion of endpoint probe and cxl_nvd presence before its call.

Signed-off-by: Neeraj Kumar <s.neeraj@samsung.com>
---
 drivers/cxl/core/region.c | 33 +++++++++++++++++++++++++++++++++
 drivers/cxl/cxl.h         |  4 ++++
 drivers/cxl/mem.c         | 24 +++++++++++++++---------
 drivers/cxl/port.c        | 39 +--------------------------------------
 4 files changed, 53 insertions(+), 47 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 7a0cead24490..c325aa827992 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -3606,6 +3606,39 @@ int cxl_add_to_region(struct cxl_endpoint_decoder *cxled)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL");
 
+static int discover_region(struct device *dev, void *unused)
+{
+	struct cxl_endpoint_decoder *cxled;
+	int rc;
+
+	if (!is_endpoint_decoder(dev))
+		return 0;
+
+	cxled = to_cxl_endpoint_decoder(dev);
+	if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0)
+		return 0;
+
+	if (cxled->state != CXL_DECODER_STATE_AUTO)
+		return 0;
+
+	/*
+	 * Region enumeration is opportunistic, if this add-event fails,
+	 * continue to the next endpoint decoder.
+	 */
+	rc = cxl_add_to_region(cxled);
+	if (rc)
+		dev_dbg(dev, "failed to add to region: %#llx-%#llx\n",
+			cxled->cxld.hpa_range.start, cxled->cxld.hpa_range.end);
+
+	return 0;
+}
+
+void cxl_region_discovery(struct cxl_port *port)
+{
+	device_for_each_child(&port->dev, NULL, discover_region);
+}
+EXPORT_SYMBOL_NS_GPL(cxl_region_discovery, "CXL");
+
 u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa)
 {
 	struct cxl_region_ref *iter;
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 4fe3df06f57a..b57597e55f7e 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -873,6 +873,7 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
 int cxl_add_to_region(struct cxl_endpoint_decoder *cxled);
 struct cxl_dax_region *to_cxl_dax_region(struct device *dev);
 u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa);
+void cxl_region_discovery(struct cxl_port *port);
 #else
 static inline bool is_cxl_pmem_region(struct device *dev)
 {
@@ -895,6 +896,9 @@ static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint,
 {
 	return 0;
 }
+static inline void cxl_region_discovery(struct cxl_port *port)
+{
+}
 #endif
 
 void cxl_endpoint_parse_cdat(struct cxl_port *port);
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
index 6e6777b7bafb..54501616ff09 100644
--- a/drivers/cxl/mem.c
+++ b/drivers/cxl/mem.c
@@ -152,15 +152,6 @@ static int cxl_mem_probe(struct device *dev)
 		return -ENXIO;
 	}
 
-	if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) {
-		rc = devm_cxl_add_nvdimm(parent_port, cxlmd);
-		if (rc) {
-			if (rc == -ENODEV)
-				dev_info(dev, "PMEM disabled by platform\n");
-			return rc;
-		}
-	}
-
 	if (dport->rch)
 		endpoint_parent = parent_port->uport_dev;
 	else
@@ -184,6 +175,21 @@ static int cxl_mem_probe(struct device *dev)
 	if (rc)
 		dev_dbg(dev, "CXL memdev EDAC registration failed rc=%d\n", rc);
 
+	if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) {
+		rc = devm_cxl_add_nvdimm(parent_port, cxlmd);
+		if (rc) {
+			if (rc == -ENODEV)
+				dev_info(dev, "PMEM disabled by platform\n");
+			return rc;
+		}
+	}
+
+	/*
+	 * Now that all endpoint decoders are successfully enumerated, try to
+	 * assemble region autodiscovery from committed decoders.
+	 */
+	cxl_region_discovery(cxlmd->endpoint);
+
 	/*
 	 * The kernel may be operating out of CXL memory on this device,
 	 * there is no spec defined way to determine whether this device
diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
index cf32dc50b7a6..07bb909b7d2e 100644
--- a/drivers/cxl/port.c
+++ b/drivers/cxl/port.c
@@ -30,33 +30,6 @@ static void schedule_detach(void *cxlmd)
 	schedule_cxl_memdev_detach(cxlmd);
 }
 
-static int discover_region(struct device *dev, void *unused)
-{
-	struct cxl_endpoint_decoder *cxled;
-	int rc;
-
-	if (!is_endpoint_decoder(dev))
-		return 0;
-
-	cxled = to_cxl_endpoint_decoder(dev);
-	if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0)
-		return 0;
-
-	if (cxled->state != CXL_DECODER_STATE_AUTO)
-		return 0;
-
-	/*
-	 * Region enumeration is opportunistic, if this add-event fails,
-	 * continue to the next endpoint decoder.
-	 */
-	rc = cxl_add_to_region(cxled);
-	if (rc)
-		dev_dbg(dev, "failed to add to region: %#llx-%#llx\n",
-			cxled->cxld.hpa_range.start, cxled->cxld.hpa_range.end);
-
-	return 0;
-}
-
 static int cxl_switch_port_probe(struct cxl_port *port)
 {
 	struct cxl_hdm *cxlhdm;
@@ -121,17 +94,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
 	if (rc)
 		return rc;
 
-	rc = devm_cxl_enumerate_decoders(cxlhdm, &info);
-	if (rc)
-		return rc;
-
-	/*
-	 * Now that all endpoint decoders are successfully enumerated, try to
-	 * assemble regions from committed decoders
-	 */
-	device_for_each_child(&port->dev, NULL, discover_region);
-
-	return 0;
+	return devm_cxl_enumerate_decoders(cxlhdm, &info);
 }
 
 static int cxl_port_probe(struct device *dev)
-- 
2.34.1
Re: [PATCH V3 13/20] cxl/mem: Refactor cxl pmem region auto-assembling
Posted by Dave Jiang 4 months, 2 weeks ago

On 9/17/25 6:41 AM, Neeraj Kumar wrote:
> In 84ec985944ef3, devm_cxl_add_nvdimm() sequence was changed and called
> before devm_cxl_add_endpoint(). It's because cxl pmem region auto-assembly
> used to get called at last in cxl_endpoint_port_probe(), which requires
> cxl_nvd presence.
> 
> For cxl region persistency, region creation happens during nvdimm_probe
> which need the completion of endpoint probe.
> 
> In order to accommodate both cxl pmem region auto-assembly and cxl region
> persistency, refactored following
> 
> 1. Re-Sequence devm_cxl_add_nvdimm() after devm_cxl_add_endpoint(). This
>    will be called only after successful completion of endpoint probe.
> 
> 2. Moved cxl pmem region auto-assembly from cxl_endpoint_port_probe() to
>    cxl_mem_probe() after devm_cxl_add_nvdimm(). It gurantees both the
>    completion of endpoint probe and cxl_nvd presence before its call.

Given that we are going forward with this implementation [1] from Dan and drivers like the type2 enabling are going to be using it as well, can you please consider converting this change to Dan's mechanism instead of creating a whole new one?

I think the region discovery can be done via the ops->probe() callback. Thanks.

[1]: https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/commit/?h=for-6.18/cxl-probe-order&id=88aec5ea7a24da00dc92c7778df4851fe4fd3ec6

DJ

> 
> Signed-off-by: Neeraj Kumar <s.neeraj@samsung.com>
> ---
>  drivers/cxl/core/region.c | 33 +++++++++++++++++++++++++++++++++
>  drivers/cxl/cxl.h         |  4 ++++
>  drivers/cxl/mem.c         | 24 +++++++++++++++---------
>  drivers/cxl/port.c        | 39 +--------------------------------------
>  4 files changed, 53 insertions(+), 47 deletions(-)
> 
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 7a0cead24490..c325aa827992 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -3606,6 +3606,39 @@ int cxl_add_to_region(struct cxl_endpoint_decoder *cxled)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_add_to_region, "CXL");
>  
> +static int discover_region(struct device *dev, void *unused)
> +{
> +	struct cxl_endpoint_decoder *cxled;
> +	int rc;
> +
> +	if (!is_endpoint_decoder(dev))
> +		return 0;
> +
> +	cxled = to_cxl_endpoint_decoder(dev);
> +	if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0)
> +		return 0;
> +
> +	if (cxled->state != CXL_DECODER_STATE_AUTO)
> +		return 0;
> +
> +	/*
> +	 * Region enumeration is opportunistic, if this add-event fails,
> +	 * continue to the next endpoint decoder.
> +	 */
> +	rc = cxl_add_to_region(cxled);
> +	if (rc)
> +		dev_dbg(dev, "failed to add to region: %#llx-%#llx\n",
> +			cxled->cxld.hpa_range.start, cxled->cxld.hpa_range.end);
> +
> +	return 0;
> +}
> +
> +void cxl_region_discovery(struct cxl_port *port)
> +{
> +	device_for_each_child(&port->dev, NULL, discover_region);
> +}
> +EXPORT_SYMBOL_NS_GPL(cxl_region_discovery, "CXL");
> +
>  u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa)
>  {
>  	struct cxl_region_ref *iter;
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index 4fe3df06f57a..b57597e55f7e 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -873,6 +873,7 @@ struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev);
>  int cxl_add_to_region(struct cxl_endpoint_decoder *cxled);
>  struct cxl_dax_region *to_cxl_dax_region(struct device *dev);
>  u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa);
> +void cxl_region_discovery(struct cxl_port *port);
>  #else
>  static inline bool is_cxl_pmem_region(struct device *dev)
>  {
> @@ -895,6 +896,9 @@ static inline u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint,
>  {
>  	return 0;
>  }
> +static inline void cxl_region_discovery(struct cxl_port *port)
> +{
> +}
>  #endif
>  
>  void cxl_endpoint_parse_cdat(struct cxl_port *port);
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index 6e6777b7bafb..54501616ff09 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -152,15 +152,6 @@ static int cxl_mem_probe(struct device *dev)
>  		return -ENXIO;
>  	}
>  
> -	if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) {
> -		rc = devm_cxl_add_nvdimm(parent_port, cxlmd);
> -		if (rc) {
> -			if (rc == -ENODEV)
> -				dev_info(dev, "PMEM disabled by platform\n");
> -			return rc;
> -		}
> -	}
> -
>  	if (dport->rch)
>  		endpoint_parent = parent_port->uport_dev;
>  	else
> @@ -184,6 +175,21 @@ static int cxl_mem_probe(struct device *dev)
>  	if (rc)
>  		dev_dbg(dev, "CXL memdev EDAC registration failed rc=%d\n", rc);
>  
> +	if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) {
> +		rc = devm_cxl_add_nvdimm(parent_port, cxlmd);
> +		if (rc) {
> +			if (rc == -ENODEV)
> +				dev_info(dev, "PMEM disabled by platform\n");
> +			return rc;
> +		}
> +	}
> +
> +	/*
> +	 * Now that all endpoint decoders are successfully enumerated, try to
> +	 * assemble region autodiscovery from committed decoders.
> +	 */
> +	cxl_region_discovery(cxlmd->endpoint);
> +
>  	/*
>  	 * The kernel may be operating out of CXL memory on this device,
>  	 * there is no spec defined way to determine whether this device
> diff --git a/drivers/cxl/port.c b/drivers/cxl/port.c
> index cf32dc50b7a6..07bb909b7d2e 100644
> --- a/drivers/cxl/port.c
> +++ b/drivers/cxl/port.c
> @@ -30,33 +30,6 @@ static void schedule_detach(void *cxlmd)
>  	schedule_cxl_memdev_detach(cxlmd);
>  }
>  
> -static int discover_region(struct device *dev, void *unused)
> -{
> -	struct cxl_endpoint_decoder *cxled;
> -	int rc;
> -
> -	if (!is_endpoint_decoder(dev))
> -		return 0;
> -
> -	cxled = to_cxl_endpoint_decoder(dev);
> -	if ((cxled->cxld.flags & CXL_DECODER_F_ENABLE) == 0)
> -		return 0;
> -
> -	if (cxled->state != CXL_DECODER_STATE_AUTO)
> -		return 0;
> -
> -	/*
> -	 * Region enumeration is opportunistic, if this add-event fails,
> -	 * continue to the next endpoint decoder.
> -	 */
> -	rc = cxl_add_to_region(cxled);
> -	if (rc)
> -		dev_dbg(dev, "failed to add to region: %#llx-%#llx\n",
> -			cxled->cxld.hpa_range.start, cxled->cxld.hpa_range.end);
> -
> -	return 0;
> -}
> -
>  static int cxl_switch_port_probe(struct cxl_port *port)
>  {
>  	struct cxl_hdm *cxlhdm;
> @@ -121,17 +94,7 @@ static int cxl_endpoint_port_probe(struct cxl_port *port)
>  	if (rc)
>  		return rc;
>  
> -	rc = devm_cxl_enumerate_decoders(cxlhdm, &info);
> -	if (rc)
> -		return rc;
> -
> -	/*
> -	 * Now that all endpoint decoders are successfully enumerated, try to
> -	 * assemble regions from committed decoders
> -	 */
> -	device_for_each_child(&port->dev, NULL, discover_region);
> -
> -	return 0;
> +	return devm_cxl_enumerate_decoders(cxlhdm, &info);
>  }
>  
>  static int cxl_port_probe(struct device *dev)
Re: [PATCH V3 13/20] cxl/mem: Refactor cxl pmem region auto-assembling
Posted by Neeraj Kumar 4 months, 1 week ago
On 23/09/25 03:37PM, Dave Jiang wrote:
>
>
>On 9/17/25 6:41 AM, Neeraj Kumar wrote:
>> In 84ec985944ef3, devm_cxl_add_nvdimm() sequence was changed and called
>> before devm_cxl_add_endpoint(). It's because cxl pmem region auto-assembly
>> used to get called at last in cxl_endpoint_port_probe(), which requires
>> cxl_nvd presence.
>>
>> For cxl region persistency, region creation happens during nvdimm_probe
>> which need the completion of endpoint probe.
>>
>> In order to accommodate both cxl pmem region auto-assembly and cxl region
>> persistency, refactored following
>>
>> 1. Re-Sequence devm_cxl_add_nvdimm() after devm_cxl_add_endpoint(). This
>>    will be called only after successful completion of endpoint probe.
>>
>> 2. Moved cxl pmem region auto-assembly from cxl_endpoint_port_probe() to
>>    cxl_mem_probe() after devm_cxl_add_nvdimm(). It gurantees both the
>>    completion of endpoint probe and cxl_nvd presence before its call.
>
>Given that we are going forward with this implementation [1] from Dan and drivers like the type2 enabling are going to be using it as well, can you please consider converting this change to Dan's mechanism instead of creating a whole new one?
>
>I think the region discovery can be done via the ops->probe() callback. Thanks.
>
>[1]: https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/commit/?h=for-6.18/cxl-probe-order&id=88aec5ea7a24da00dc92c7778df4851fe4fd3ec6
>
>DJ
>

Sure, Let me revisit this.
It seems [1] is there in seperate branch "for-6.18/cxl-probe-order", and not yet merged into next, right?


Regards,
Neeraj
Re: [PATCH V3 13/20] cxl/mem: Refactor cxl pmem region auto-assembling
Posted by Dave Jiang 4 months ago

On 9/29/25 6:30 AM, Neeraj Kumar wrote:
> On 23/09/25 03:37PM, Dave Jiang wrote:
>>
>>
>> On 9/17/25 6:41 AM, Neeraj Kumar wrote:
>>> In 84ec985944ef3, devm_cxl_add_nvdimm() sequence was changed and called
>>> before devm_cxl_add_endpoint(). It's because cxl pmem region auto-assembly
>>> used to get called at last in cxl_endpoint_port_probe(), which requires
>>> cxl_nvd presence.
>>>
>>> For cxl region persistency, region creation happens during nvdimm_probe
>>> which need the completion of endpoint probe.
>>>
>>> In order to accommodate both cxl pmem region auto-assembly and cxl region
>>> persistency, refactored following
>>>
>>> 1. Re-Sequence devm_cxl_add_nvdimm() after devm_cxl_add_endpoint(). This
>>>    will be called only after successful completion of endpoint probe.
>>>
>>> 2. Moved cxl pmem region auto-assembly from cxl_endpoint_port_probe() to
>>>    cxl_mem_probe() after devm_cxl_add_nvdimm(). It gurantees both the
>>>    completion of endpoint probe and cxl_nvd presence before its call.
>>
>> Given that we are going forward with this implementation [1] from Dan and drivers like the type2 enabling are going to be using it as well, can you please consider converting this change to Dan's mechanism instead of creating a whole new one?
>>
>> I think the region discovery can be done via the ops->probe() callback. Thanks.
>>
>> [1]: https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/commit/?h=for-6.18/cxl-probe-order&id=88aec5ea7a24da00dc92c7778df4851fe4fd3ec6
>>
>> DJ
>>
> 
> Sure, Let me revisit this.
> It seems [1] is there in seperate branch "for-6.18/cxl-probe-order", and not yet merged into next, right?

Right. I believe Smita and Alejandro are using that as well. Depending on who gets there first. We can setup an immutable branch at some point.

[1]: https://lore.kernel.org/linux-cxl/20250822034202.26896-1-Smita.KoralahalliChannabasappa@amd.com/T/#t

DJ

> 
> 
> Regards,
> Neeraj
> 

Re: [PATCH V3 13/20] cxl/mem: Refactor cxl pmem region auto-assembling
Posted by Neeraj Kumar 3 months ago
On 06/10/25 08:55AM, Dave Jiang wrote:
>
>
>On 9/29/25 6:30 AM, Neeraj Kumar wrote:
>> On 23/09/25 03:37PM, Dave Jiang wrote:
>>>
>>>
>>> On 9/17/25 6:41 AM, Neeraj Kumar wrote:
>>>> In 84ec985944ef3, devm_cxl_add_nvdimm() sequence was changed and called
>>>> before devm_cxl_add_endpoint(). It's because cxl pmem region auto-assembly
>>>> used to get called at last in cxl_endpoint_port_probe(), which requires
>>>> cxl_nvd presence.
>>>>
>>>> For cxl region persistency, region creation happens during nvdimm_probe
>>>> which need the completion of endpoint probe.
>>>>
>>>> In order to accommodate both cxl pmem region auto-assembly and cxl region
>>>> persistency, refactored following
>>>>
>>>> 1. Re-Sequence devm_cxl_add_nvdimm() after devm_cxl_add_endpoint(). This
>>>>    will be called only after successful completion of endpoint probe.
>>>>
>>>> 2. Moved cxl pmem region auto-assembly from cxl_endpoint_port_probe() to
>>>>    cxl_mem_probe() after devm_cxl_add_nvdimm(). It gurantees both the
>>>>    completion of endpoint probe and cxl_nvd presence before its call.
>>>
>>> Given that we are going forward with this implementation [1] from Dan and drivers like the type2 enabling are going to be using it as well, can you please consider converting this change to Dan's mechanism instead of creating a whole new one?
>>>
>>> I think the region discovery can be done via the ops->probe() callback. Thanks.
>>>
>>> [1]: https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/commit/?h=for-6.18/cxl-probe-order&id=88aec5ea7a24da00dc92c7778df4851fe4fd3ec6
>>>
>>> DJ
>>>
>>
>> Sure, Let me revisit this.
>> It seems [1] is there in seperate branch "for-6.18/cxl-probe-order", and not yet merged into next, right?
>
>Right. I believe Smita and Alejandro are using that as well. Depending on who gets there first. We can setup an immutable branch at some point.
>
>[1]: https://lore.kernel.org/linux-cxl/20250822034202.26896-1-Smita.KoralahalliChannabasappa@amd.com/T/#t
>
>DJ

Hi Dave,

As per Dan's [1] newly introduced infra, Following is my understanding.

Currently cxl_pci does not care about the attach state of the cxl_memdev
because all generic memory expansion functionality can be handled by the
cxl_core. For accelerators, the driver needs to know and perform driver
specific initialization if CXL is available, or exectute a fallback to PCIe
only operation.

Dan's new infra is needed for CXL accelerator device drivers that need to
make decisions about enabling CXL dependent functionality in the device, or
falling back to PCIe-only operation.

During cxl_pci_probe() we call devm_cxl_add_memdev(struct cxl_memdev_ops *ops)
where function pointer as ops gets registered which gets called in cxl_mem_probe()
using cxlmd->ops->probe()

The probe callback runs after the port topology is successfully attached for
the given memdev.

So to use this infra we have to pass cxl_region_discovery() as ops parameter
of devm_cxl_add_memdev() getting called from cxl_pci_probe().
  
In this patch-set cxl_region_discovery() signature is different from cxlmd->ops->probe()

    {{{
	void cxl_region_discovery(struct cxl_port *port)
	{
         	device_for_each_child(&port->dev, NULL, discover_region);
	}

	struct cxl_memdev_ops {
	        int (*probe)(struct cxl_memdev *cxlmd);
	};
    }}}

Even after changing the signature of cxl_region_discovery() as per cxlmd->ops->probe()
may create problem as when the ops->probe() fails, then it will halts the probe sequence
of cxl_pci_probe()

It is because discover_region() may fail if two memdevs are participating into one region

Also, region auto assembly is mandatory functionality which creates region
if (cxled->state == CXL_DECODER_STATE_AUTO) gets satisfied.

Currently region auto assembly (added by a32320b71f085) happens after successfull
enumeration of endpoint decoders at cxl_endpoint_port_probe(), which I have moved at
cxl_mem_probe() after devm_cxl_add_nvdimm() which prepares cxl_nvd infra required by it.

As discussed in [1], this patch-set does the movement of auto region assembly from
cxl_endpoint_port_probe() to cxl_mem_probe() and resolved the conflicting dependency
of cxl_nvd infra required by both region creation using LSA and auto region assembly.

[1]: https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/commit/?h=for-6.18/cxl-probe-order&id=88aec5ea7a24da00dc92c7778df4851fe4fd3ec6 
[2]: https://lore.kernel.org/linux-cxl/1931444790.41752909482841.JavaMail.epsvc@epcpadp2new/

Please let me know if my understanding is correct or I am missing something?


Regards,
Neeraj

Re: [PATCH V3 13/20] cxl/mem: Refactor cxl pmem region auto-assembling
Posted by Dave Jiang 2 months, 3 weeks ago

On 11/7/25 5:39 AM, Neeraj Kumar wrote:
> On 06/10/25 08:55AM, Dave Jiang wrote:
>>
>>
>> On 9/29/25 6:30 AM, Neeraj Kumar wrote:
>>> On 23/09/25 03:37PM, Dave Jiang wrote:
>>>>
>>>>
>>>> On 9/17/25 6:41 AM, Neeraj Kumar wrote:
>>>>> In 84ec985944ef3, devm_cxl_add_nvdimm() sequence was changed and called
>>>>> before devm_cxl_add_endpoint(). It's because cxl pmem region auto-assembly
>>>>> used to get called at last in cxl_endpoint_port_probe(), which requires
>>>>> cxl_nvd presence.
>>>>>
>>>>> For cxl region persistency, region creation happens during nvdimm_probe
>>>>> which need the completion of endpoint probe.
>>>>>
>>>>> In order to accommodate both cxl pmem region auto-assembly and cxl region
>>>>> persistency, refactored following
>>>>>
>>>>> 1. Re-Sequence devm_cxl_add_nvdimm() after devm_cxl_add_endpoint(). This
>>>>>    will be called only after successful completion of endpoint probe.
>>>>>
>>>>> 2. Moved cxl pmem region auto-assembly from cxl_endpoint_port_probe() to
>>>>>    cxl_mem_probe() after devm_cxl_add_nvdimm(). It gurantees both the
>>>>>    completion of endpoint probe and cxl_nvd presence before its call.
>>>>
>>>> Given that we are going forward with this implementation [1] from Dan and drivers like the type2 enabling are going to be using it as well, can you please consider converting this change to Dan's mechanism instead of creating a whole new one?
>>>>
>>>> I think the region discovery can be done via the ops->probe() callback. Thanks.
>>>>
>>>> [1]: https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/commit/?h=for-6.18/cxl-probe-order&id=88aec5ea7a24da00dc92c7778df4851fe4fd3ec6
>>>>
>>>> DJ
>>>>
>>>
>>> Sure, Let me revisit this.
>>> It seems [1] is there in seperate branch "for-6.18/cxl-probe-order", and not yet merged into next, right?
>>
>> Right. I believe Smita and Alejandro are using that as well. Depending on who gets there first. We can setup an immutable branch at some point.
>>
>> [1]: https://lore.kernel.org/linux-cxl/20250822034202.26896-1-Smita.KoralahalliChannabasappa@amd.com/T/#t
>>
>> DJ
> 
> Hi Dave,
> 
> As per Dan's [1] newly introduced infra, Following is my understanding.
> 
> Currently cxl_pci does not care about the attach state of the cxl_memdev
> because all generic memory expansion functionality can be handled by the
> cxl_core. For accelerators, the driver needs to know and perform driver
> specific initialization if CXL is available, or exectute a fallback to PCIe
> only operation.
> 
> Dan's new infra is needed for CXL accelerator device drivers that need to
> make decisions about enabling CXL dependent functionality in the device, or
> falling back to PCIe-only operation.
> 
> During cxl_pci_probe() we call devm_cxl_add_memdev(struct cxl_memdev_ops *ops)
> where function pointer as ops gets registered which gets called in cxl_mem_probe()
> using cxlmd->ops->probe()
> 
> The probe callback runs after the port topology is successfully attached for
> the given memdev.
> 
> So to use this infra we have to pass cxl_region_discovery() as ops parameter
> of devm_cxl_add_memdev() getting called from cxl_pci_probe().
>  
> In this patch-set cxl_region_discovery() signature is different from cxlmd->ops->probe()
> 
>    {{{
>     void cxl_region_discovery(struct cxl_port *port)
>     {
>             device_for_each_child(&port->dev, NULL, discover_region);
>     }
> 
>     struct cxl_memdev_ops {
>             int (*probe)(struct cxl_memdev *cxlmd);
>     };
>    }}}
> 
> Even after changing the signature of cxl_region_discovery() as per cxlmd->ops->probe()
> may create problem as when the ops->probe() fails, then it will halts the probe sequence
> of cxl_pci_probe()
> 
> It is because discover_region() may fail if two memdevs are participating into one region

While discover_region() may fail, the return value is ignored. The current code disregards failures from device_for_each_child(). And also above, cxl_region_discovery() returns void. So I don't follow how ops->probe() would fail if we ignore errors from discover_region().

DJ

> 
> Also, region auto assembly is mandatory functionality which creates region
> if (cxled->state == CXL_DECODER_STATE_AUTO) gets satisfied.
> 
> Currently region auto assembly (added by a32320b71f085) happens after successfull
> enumeration of endpoint decoders at cxl_endpoint_port_probe(), which I have moved at
> cxl_mem_probe() after devm_cxl_add_nvdimm() which prepares cxl_nvd infra required by it.
> 
> As discussed in [1], this patch-set does the movement of auto region assembly from
> cxl_endpoint_port_probe() to cxl_mem_probe() and resolved the conflicting dependency
> of cxl_nvd infra required by both region creation using LSA and auto region assembly.
> 
> [1]: https://git.kernel.org/pub/scm/linux/kernel/git/cxl/cxl.git/commit/?h=for-6.18/cxl-probe-order&id=88aec5ea7a24da00dc92c7778df4851fe4fd3ec6 [2]: https://lore.kernel.org/linux-cxl/1931444790.41752909482841.JavaMail.epsvc@epcpadp2new/
> 
> Please let me know if my understanding is correct or I am missing something?
> 
> 
> Regards,
> Neeraj
> 
>
Re: [PATCH V3 13/20] cxl/mem: Refactor cxl pmem region auto-assembling
Posted by Neeraj Kumar 2 months, 3 weeks ago
On 12/11/25 08:55AM, Dave Jiang wrote:
>
>

<snip>

>> During cxl_pci_probe() we call devm_cxl_add_memdev(struct cxl_memdev_ops *ops)
>> where function pointer as ops gets registered which gets called in cxl_mem_probe()
>> using cxlmd->ops->probe()
>>
>> The probe callback runs after the port topology is successfully attached for
>> the given memdev.
>>
>> So to use this infra we have to pass cxl_region_discovery() as ops parameter
>> of devm_cxl_add_memdev() getting called from cxl_pci_probe().
>>  
>> In this patch-set cxl_region_discovery() signature is different from cxlmd->ops->probe()
>>
>>    {{{
>>     void cxl_region_discovery(struct cxl_port *port)
>>     {
>>             device_for_each_child(&port->dev, NULL, discover_region);
>>     }
>>
>>     struct cxl_memdev_ops {
>>             int (*probe)(struct cxl_memdev *cxlmd);
>>     };
>>    }}}
>>
>> Even after changing the signature of cxl_region_discovery() as per cxlmd->ops->probe()
>> may create problem as when the ops->probe() fails, then it will halts the probe sequence
>> of cxl_pci_probe()
>>
>> It is because discover_region() may fail if two memdevs are participating into one region
>
>While discover_region() may fail, the return value is ignored. The current code disregards failures from device_for_each_child(). And also above, cxl_region_discovery() returns void. So I don't follow how ops->probe() would fail if we ignore errors from discover_region().
>
>DJ

Hi Dave,

Yes, you are correct. We can just change signature of cxl_region_discovery() as per
cxlmd->ops->probe(), anyway we are ignoring errors from discover_region().
With this change we can directly register cxl_region_discovery() with
devm_cxl_add_memdev(struct cxl_memdev_ops *ops) during pci_probe() using Dan's Infra.

I will use this new infra for region auto-assembling and share the v4 series shortly.


Regards,
Neeraj