[PATCH v4 14/28] cxl/port: Add endpoint decoder DC mode support to sysfs

Ira Weiny posted 28 patches 1 month, 2 weeks ago
There is a newer version of this series
[PATCH v4 14/28] cxl/port: Add endpoint decoder DC mode support to sysfs
Posted by ira.weiny@intel.com 1 month, 2 weeks ago
From: Navneet Singh <navneet.singh@intel.com>

Endpoint decoder mode is used to represent the partition the decoder
points to such as ram or pmem.

Expand the mode to allow a decoder to point to a specific DC partition
(Region).

Signed-off-by: Navneet Singh <navneet.singh@intel.com>
Co-developed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>

---
Changes:
[iweiny: prevent creation of region on shareable DC partitions]
[Fan: change mode range logic]
[Fan: use !resource_size()]
[djiang: use the static mode name string array in mode_store()]
[Jonathan: remove rc check from mode to region index]
[Jonathan: clarify decoder mode 'mixed']
[djbw: drop cleanup patch and just follow the convention in cxl_dpa_set_mode()]
[fan: make dcd resource size check similar to other partitions]
[djbw, jonathan, fan: remove mode range check from dc_mode_to_region_index]
[iweiny: push sysfs versions to 6.12]
---
 Documentation/ABI/testing/sysfs-bus-cxl | 21 ++++++++++----------
 drivers/cxl/core/hdm.c                  | 17 ++++++++++++++++
 drivers/cxl/core/port.c                 | 10 +++++-----
 drivers/cxl/cxl.h                       | 35 ++++++++++++++++++---------------
 4 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
index b865eefdb74c..661dab99183f 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -361,23 +361,24 @@ Description:
 
 
 What:		/sys/bus/cxl/devices/decoderX.Y/mode
-Date:		May, 2022
-KernelVersion:	v6.0
+Date:		May, 2022, October 2024
+KernelVersion:	v6.0, v6.12 (dcY)
 Contact:	linux-cxl@vger.kernel.org
 Description:
 		(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
 		translates from a host physical address range, to a device local
 		address range. Device-local address ranges are further split
-		into a 'ram' (volatile memory) range and 'pmem' (persistent
-		memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
-		'mixed', or 'none'. The 'mixed' indication is for error cases
-		when a decoder straddles the volatile/persistent partition
-		boundary, and 'none' indicates the decoder is not actively
-		decoding, or no DPA allocation policy has been set.
+		into a 'ram' (volatile memory) range, 'pmem' (persistent
+		memory) range, or Dynamic Capacity (DC) range. The 'mode'
+		attribute emits one of 'ram', 'pmem', 'dcY', 'mixed', or
+		'none'. The 'mixed' indication is for error cases when a
+		decoder straddles partition boundaries, and 'none' indicates
+		the decoder is not actively decoding, or no DPA allocation
+		policy has been set.
 
 		'mode' can be written, when the decoder is in the 'disabled'
-		state, with either 'ram' or 'pmem' to set the boundaries for the
-		next allocation.
+		state, with 'ram', 'pmem', or 'dcY' to set the boundaries for
+		the next allocation.
 
 
 What:		/sys/bus/cxl/devices/decoderX.Y/dpa_resource
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 8c7f941eaba1..b368babb55d9 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -551,6 +551,7 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
 	switch (mode) {
 	case CXL_DECODER_RAM:
 	case CXL_DECODER_PMEM:
+	case CXL_DECODER_DC0 ... CXL_DECODER_DC7:
 		break;
 	default:
 		dev_dbg(dev, "unsupported mode: %d\n", mode);
@@ -578,6 +579,22 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
 		goto out;
 	}
 
+	if (mode >= CXL_DECODER_DC0 && mode <= CXL_DECODER_DC7) {
+		struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+
+		rc = dc_mode_to_region_index(mode);
+		if (!resource_size(&cxlds->dc_res[rc])) {
+			dev_dbg(dev, "no available dynamic capacity\n");
+			rc = -ENXIO;
+			goto out;
+		}
+		if (mds->dc_region[rc].shareable) {
+			dev_err(dev, "DC region %d is shareable\n", rc);
+			rc = -EINVAL;
+			goto out;
+		}
+	}
+
 	cxled->mode = mode;
 	rc = 0;
 out:
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 85b912c11f04..23b4f266a83a 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -205,11 +205,11 @@ static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
 	enum cxl_decoder_mode mode;
 	ssize_t rc;
 
-	if (sysfs_streq(buf, "pmem"))
-		mode = CXL_DECODER_PMEM;
-	else if (sysfs_streq(buf, "ram"))
-		mode = CXL_DECODER_RAM;
-	else
+	for (mode = CXL_DECODER_RAM; mode < CXL_DECODER_MIXED; mode++)
+		if (sysfs_streq(buf, cxl_decoder_mode_names[mode]))
+			break;
+
+	if (mode >= CXL_DECODER_MIXED)
 		return -EINVAL;
 
 	rc = cxl_dpa_set_mode(cxled, mode);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 8b7099c38a40..cbaacbe0f36d 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -365,6 +365,9 @@ struct cxl_decoder {
 /*
  * CXL_DECODER_DEAD prevents endpoints from being reattached to regions
  * while cxld_unregister() is running
+ *
+ * NOTE: CXL_DECODER_RAM must be second and CXL_DECODER_MIXED must be last.
+ *	 See mode_store()
  */
 enum cxl_decoder_mode {
 	CXL_DECODER_NONE,
@@ -382,25 +385,25 @@ enum cxl_decoder_mode {
 	CXL_DECODER_DEAD,
 };
 
+static const char * const cxl_decoder_mode_names[] = {
+	[CXL_DECODER_NONE] = "none",
+	[CXL_DECODER_RAM] = "ram",
+	[CXL_DECODER_PMEM] = "pmem",
+	[CXL_DECODER_DC0] = "dc0",
+	[CXL_DECODER_DC1] = "dc1",
+	[CXL_DECODER_DC2] = "dc2",
+	[CXL_DECODER_DC3] = "dc3",
+	[CXL_DECODER_DC4] = "dc4",
+	[CXL_DECODER_DC5] = "dc5",
+	[CXL_DECODER_DC6] = "dc6",
+	[CXL_DECODER_DC7] = "dc7",
+	[CXL_DECODER_MIXED] = "mixed",
+};
+
 static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode)
 {
-	static const char * const names[] = {
-		[CXL_DECODER_NONE] = "none",
-		[CXL_DECODER_RAM] = "ram",
-		[CXL_DECODER_PMEM] = "pmem",
-		[CXL_DECODER_DC0] = "dc0",
-		[CXL_DECODER_DC1] = "dc1",
-		[CXL_DECODER_DC2] = "dc2",
-		[CXL_DECODER_DC3] = "dc3",
-		[CXL_DECODER_DC4] = "dc4",
-		[CXL_DECODER_DC5] = "dc5",
-		[CXL_DECODER_DC6] = "dc6",
-		[CXL_DECODER_DC7] = "dc7",
-		[CXL_DECODER_MIXED] = "mixed",
-	};
-
 	if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED)
-		return names[mode];
+		return cxl_decoder_mode_names[mode];
 	return "mixed";
 }
 

-- 
2.46.0
Re: [PATCH v4 14/28] cxl/port: Add endpoint decoder DC mode support to sysfs
Posted by Jonathan Cameron 1 month, 2 weeks ago
On Mon, 07 Oct 2024 18:16:20 -0500
ira.weiny@intel.com wrote:

> From: Navneet Singh <navneet.singh@intel.com>
> 
> Endpoint decoder mode is used to represent the partition the decoder
> points to such as ram or pmem.
> 
> Expand the mode to allow a decoder to point to a specific DC partition
> (Region).
> 
> Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>

A few comments inline about ways that can make this a little tidier
and less fragile.


Jonathan

> 
> ---
> Changes:
> [iweiny: prevent creation of region on shareable DC partitions]
> [Fan: change mode range logic]
> [Fan: use !resource_size()]
> [djiang: use the static mode name string array in mode_store()]
> [Jonathan: remove rc check from mode to region index]
> [Jonathan: clarify decoder mode 'mixed']
> [djbw: drop cleanup patch and just follow the convention in cxl_dpa_set_mode()]
> [fan: make dcd resource size check similar to other partitions]
> [djbw, jonathan, fan: remove mode range check from dc_mode_to_region_index]
> [iweiny: push sysfs versions to 6.12]
> ---
>  Documentation/ABI/testing/sysfs-bus-cxl | 21 ++++++++++----------
>  drivers/cxl/core/hdm.c                  | 17 ++++++++++++++++
>  drivers/cxl/core/port.c                 | 10 +++++-----
>  drivers/cxl/cxl.h                       | 35 ++++++++++++++++++---------------
>  4 files changed, 52 insertions(+), 31 deletions(-)
> 
> diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
> index b865eefdb74c..661dab99183f 100644
> --- a/Documentation/ABI/testing/sysfs-bus-cxl
> +++ b/Documentation/ABI/testing/sysfs-bus-cxl
> @@ -361,23 +361,24 @@ Description:
>  
>  
>  What:		/sys/bus/cxl/devices/decoderX.Y/mode
> -Date:		May, 2022
> -KernelVersion:	v6.0
> +Date:		May, 2022, October 2024
> +KernelVersion:	v6.0, v6.12 (dcY)
>  Contact:	linux-cxl@vger.kernel.org
>  Description:
>  		(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
>  		translates from a host physical address range, to a device local
>  		address range. Device-local address ranges are further split
> -		into a 'ram' (volatile memory) range and 'pmem' (persistent
> -		memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
> -		'mixed', or 'none'. The 'mixed' indication is for error cases
> -		when a decoder straddles the volatile/persistent partition
> -		boundary, and 'none' indicates the decoder is not actively
> -		decoding, or no DPA allocation policy has been set.
> +		into a 'ram' (volatile memory) range, 'pmem' (persistent
> +		memory) range, or Dynamic Capacity (DC) range.
		memory) range, and Dynamic Capacity (DC) ranges.

(doesn't work with preceding text otherwise)

> The 'mode'
> +		attribute emits one of 'ram', 'pmem', 'dcY', 'mixed', or
> +		'none'. The 'mixed' indication is for error cases when a
> +		decoder straddles partition boundaries, and 'none' indicates
> +		the decoder is not actively decoding, or no DPA allocation
> +		policy has been set.
>  
>  		'mode' can be written, when the decoder is in the 'disabled'
> -		state, with either 'ram' or 'pmem' to set the boundaries for the
> -		next allocation.
> +		state, with 'ram', 'pmem', or 'dcY' to set the boundaries for
> +		the next allocation.
>  
>  
>  What:		/sys/bus/cxl/devices/decoderX.Y/dpa_resource
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 8c7f941eaba1..b368babb55d9 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -551,6 +551,7 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
>  	switch (mode) {
>  	case CXL_DECODER_RAM:
>  	case CXL_DECODER_PMEM:
> +	case CXL_DECODER_DC0 ... CXL_DECODER_DC7:
>  		break;
>  	default:
>  		dev_dbg(dev, "unsupported mode: %d\n", mode);
> @@ -578,6 +579,22 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
>  		goto out;
>  	}
>  
> +	if (mode >= CXL_DECODER_DC0 && mode <= CXL_DECODER_DC7) {
> +		struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
> +
> +		rc = dc_mode_to_region_index(mode);
> +		if (!resource_size(&cxlds->dc_res[rc])) {
> +			dev_dbg(dev, "no available dynamic capacity\n");
> +			rc = -ENXIO;
> +			goto out;
Probably worth adding a precursor patch that uses guard(rwsem_write) on
the cxl_dpa_rwsem
Allows for early returns simplifying existing code and this.


> +		}
> +		if (mds->dc_region[rc].shareable) {
> +			dev_err(dev, "DC region %d is shareable\n", rc);
> +			rc = -EINVAL;
> +			goto out;
> +		}
> +	}
> +
>  	cxled->mode = mode;
>  	rc = 0;
>  out:
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index 85b912c11f04..23b4f266a83a 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -205,11 +205,11 @@ static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
>  	enum cxl_decoder_mode mode;
>  	ssize_t rc;
>  
> -	if (sysfs_streq(buf, "pmem"))
> -		mode = CXL_DECODER_PMEM;
> -	else if (sysfs_streq(buf, "ram"))
> -		mode = CXL_DECODER_RAM;
> -	else
> +	for (mode = CXL_DECODER_RAM; mode < CXL_DECODER_MIXED; mode++)
> +		if (sysfs_streq(buf, cxl_decoder_mode_names[mode]))
> +			break;
> +
Loop over them all then do what you have here but explicit matches
to reject the ones that can't be set.
Add a MODE_COUNT to the end of the options.

	for (mode = 0; mode < CXL_DECODER_MODE_COUNT; mode++)
		if (sysfs_streq(buf, cxl_decoder_mode_names[mode]))
			break;

	if (mode == CXL_DECODER_MODE_COUNT)
		return -EINVAL;

	if (mode == CXL_DECODER_NONE)
		return -EINVAL;

	/* Not yet supported */
	if (mode == CXL_DECODER_MIXED)
		return -EINVAL;
...

> +	if (mode >= CXL_DECODER_MIXED)
>  		return -EINVAL;
>  
>  	rc = cxl_dpa_set_mode(cxled, mode);
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index 8b7099c38a40..cbaacbe0f36d 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -365,6 +365,9 @@ struct cxl_decoder {
>  /*
>   * CXL_DECODER_DEAD prevents endpoints from being reattached to regions
>   * while cxld_unregister() is running
> + *
> + * NOTE: CXL_DECODER_RAM must be second and CXL_DECODER_MIXED must be last.
This is a bit ugly. I'd change the logic a bit to avoid it.
The list of things we don't support is short so just check for them.
See above.

> + *	 See mode_store()
>   */
>  enum cxl_decoder_mode {
>  	CXL_DECODER_NONE,
> @@ -382,25 +385,25 @@ enum cxl_decoder_mode {
>  	CXL_DECODER_DEAD,
>  };
>  
> +static const char * const cxl_decoder_mode_names[] = {
> +	[CXL_DECODER_NONE] = "none",
> +	[CXL_DECODER_RAM] = "ram",
> +	[CXL_DECODER_PMEM] = "pmem",
> +	[CXL_DECODER_DC0] = "dc0",
> +	[CXL_DECODER_DC1] = "dc1",
> +	[CXL_DECODER_DC2] = "dc2",
> +	[CXL_DECODER_DC3] = "dc3",
> +	[CXL_DECODER_DC4] = "dc4",
> +	[CXL_DECODER_DC5] = "dc5",
> +	[CXL_DECODER_DC6] = "dc6",
> +	[CXL_DECODER_DC7] = "dc7",
> +	[CXL_DECODER_MIXED] = "mixed",
> +};
> +
>  static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode)
>  {
> -	static const char * const names[] = {
> -		[CXL_DECODER_NONE] = "none",
> -		[CXL_DECODER_RAM] = "ram",
> -		[CXL_DECODER_PMEM] = "pmem",
> -		[CXL_DECODER_DC0] = "dc0",
> -		[CXL_DECODER_DC1] = "dc1",
> -		[CXL_DECODER_DC2] = "dc2",
> -		[CXL_DECODER_DC3] = "dc3",
> -		[CXL_DECODER_DC4] = "dc4",
> -		[CXL_DECODER_DC5] = "dc5",
> -		[CXL_DECODER_DC6] = "dc6",
> -		[CXL_DECODER_DC7] = "dc7",
> -		[CXL_DECODER_MIXED] = "mixed",
> -	};
> -
>  	if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED)
> -		return names[mode];
> +		return cxl_decoder_mode_names[mode];
>  	return "mixed";
>  }
>  
>
Re: [PATCH v4 14/28] cxl/port: Add endpoint decoder DC mode support to sysfs
Posted by Ira Weiny 1 month, 1 week ago
Jonathan Cameron wrote:
> On Mon, 07 Oct 2024 18:16:20 -0500
> ira.weiny@intel.com wrote:
> 
> > From: Navneet Singh <navneet.singh@intel.com>
> > 
> > Endpoint decoder mode is used to represent the partition the decoder
> > points to such as ram or pmem.
> > 
> > Expand the mode to allow a decoder to point to a specific DC partition
> > (Region).
> > 
> > Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> > Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
> 
> A few comments inline about ways that can make this a little tidier
> and less fragile.

All Done.  Yea good idea on the enum.

Ira


[snip]

> >  		(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
> >  		translates from a host physical address range, to a device local
> >  		address range. Device-local address ranges are further split
> > -		into a 'ram' (volatile memory) range and 'pmem' (persistent
> > -		memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
> > -		'mixed', or 'none'. The 'mixed' indication is for error cases
> > -		when a decoder straddles the volatile/persistent partition
> > -		boundary, and 'none' indicates the decoder is not actively
> > -		decoding, or no DPA allocation policy has been set.
> > +		into a 'ram' (volatile memory) range, 'pmem' (persistent
> > +		memory) range, or Dynamic Capacity (DC) range.
> 		memory) range, and Dynamic Capacity (DC) ranges.
> 
> (doesn't work with preceding text otherwise)
> 

[snip]

> > +	if (mode >= CXL_DECODER_DC0 && mode <= CXL_DECODER_DC7) {
> > +		struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
> > +
> > +		rc = dc_mode_to_region_index(mode);
> > +		if (!resource_size(&cxlds->dc_res[rc])) {
> > +			dev_dbg(dev, "no available dynamic capacity\n");
> > +			rc = -ENXIO;
> > +			goto out;
> Probably worth adding a precursor patch that uses guard(rwsem_write) on
> the cxl_dpa_rwsem
> Allows for early returns simplifying existing code and this.

[snip]

> > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> > index 85b912c11f04..23b4f266a83a 100644
> > --- a/drivers/cxl/core/port.c
> > +++ b/drivers/cxl/core/port.c
> > @@ -205,11 +205,11 @@ static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
> >  	enum cxl_decoder_mode mode;
> >  	ssize_t rc;
> >  
> > -	if (sysfs_streq(buf, "pmem"))
> > -		mode = CXL_DECODER_PMEM;
> > -	else if (sysfs_streq(buf, "ram"))
> > -		mode = CXL_DECODER_RAM;
> > -	else
> > +	for (mode = CXL_DECODER_RAM; mode < CXL_DECODER_MIXED; mode++)
> > +		if (sysfs_streq(buf, cxl_decoder_mode_names[mode]))
> > +			break;
> > +
> Loop over them all then do what you have here but explicit matches
> to reject the ones that can't be set.
> Add a MODE_COUNT to the end of the options.
> 
> 	for (mode = 0; mode < CXL_DECODER_MODE_COUNT; mode++)
> 		if (sysfs_streq(buf, cxl_decoder_mode_names[mode]))
> 			break;
> 
> 	if (mode == CXL_DECODER_MODE_COUNT)
> 		return -EINVAL;
> 
> 	if (mode == CXL_DECODER_NONE)
> 		return -EINVAL;
> 
> 	/* Not yet supported */
> 	if (mode == CXL_DECODER_MIXED)
> 		return -EINVAL;
> ...
> 
> > +	if (mode >= CXL_DECODER_MIXED)
> >  		return -EINVAL;
> >  
> >  	rc = cxl_dpa_set_mode(cxled, mode);
> > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> > index 8b7099c38a40..cbaacbe0f36d 100644
> > --- a/drivers/cxl/cxl.h
> > +++ b/drivers/cxl/cxl.h
> > @@ -365,6 +365,9 @@ struct cxl_decoder {
> >  /*
> >   * CXL_DECODER_DEAD prevents endpoints from being reattached to regions
> >   * while cxld_unregister() is running
> > + *
> > + * NOTE: CXL_DECODER_RAM must be second and CXL_DECODER_MIXED must be last.
> This is a bit ugly. I'd change the logic a bit to avoid it.
> The list of things we don't support is short so just check for them.
> See above.
> 

[snip]