From: Navneet Singh <navneet.singh@intel.com>
Endpoint decoder mode is used to represent the partition the decoder
points to such as ram or pmem.
Expand the mode to allow a decoder to point to a specific DC partition
(Region).
Signed-off-by: Navneet Singh <navneet.singh@intel.com>
Co-developed-by: Ira Weiny <ira.weiny@intel.com>
Signed-off-by: Ira Weiny <ira.weiny@intel.com>
---
Changes:
[iweiny: prevent creation of region on shareable DC partitions]
[Fan: change mode range logic]
[Fan: use !resource_size()]
[djiang: use the static mode name string array in mode_store()]
[Jonathan: remove rc check from mode to region index]
[Jonathan: clarify decoder mode 'mixed']
[djbw: drop cleanup patch and just follow the convention in cxl_dpa_set_mode()]
[fan: make dcd resource size check similar to other partitions]
[djbw, jonathan, fan: remove mode range check from dc_mode_to_region_index]
[iweiny: push sysfs versions to 6.12]
---
Documentation/ABI/testing/sysfs-bus-cxl | 21 ++++++++++----------
drivers/cxl/core/hdm.c | 17 ++++++++++++++++
drivers/cxl/core/port.c | 10 +++++-----
drivers/cxl/cxl.h | 35 ++++++++++++++++++---------------
4 files changed, 52 insertions(+), 31 deletions(-)
diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
index b865eefdb74c..661dab99183f 100644
--- a/Documentation/ABI/testing/sysfs-bus-cxl
+++ b/Documentation/ABI/testing/sysfs-bus-cxl
@@ -361,23 +361,24 @@ Description:
What: /sys/bus/cxl/devices/decoderX.Y/mode
-Date: May, 2022
-KernelVersion: v6.0
+Date: May, 2022, October 2024
+KernelVersion: v6.0, v6.12 (dcY)
Contact: linux-cxl@vger.kernel.org
Description:
(RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
translates from a host physical address range, to a device local
address range. Device-local address ranges are further split
- into a 'ram' (volatile memory) range and 'pmem' (persistent
- memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
- 'mixed', or 'none'. The 'mixed' indication is for error cases
- when a decoder straddles the volatile/persistent partition
- boundary, and 'none' indicates the decoder is not actively
- decoding, or no DPA allocation policy has been set.
+ into a 'ram' (volatile memory) range, 'pmem' (persistent
+ memory) range, or Dynamic Capacity (DC) range. The 'mode'
+ attribute emits one of 'ram', 'pmem', 'dcY', 'mixed', or
+ 'none'. The 'mixed' indication is for error cases when a
+ decoder straddles partition boundaries, and 'none' indicates
+ the decoder is not actively decoding, or no DPA allocation
+ policy has been set.
'mode' can be written, when the decoder is in the 'disabled'
- state, with either 'ram' or 'pmem' to set the boundaries for the
- next allocation.
+ state, with 'ram', 'pmem', or 'dcY' to set the boundaries for
+ the next allocation.
What: /sys/bus/cxl/devices/decoderX.Y/dpa_resource
diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
index 8c7f941eaba1..b368babb55d9 100644
--- a/drivers/cxl/core/hdm.c
+++ b/drivers/cxl/core/hdm.c
@@ -551,6 +551,7 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
switch (mode) {
case CXL_DECODER_RAM:
case CXL_DECODER_PMEM:
+ case CXL_DECODER_DC0 ... CXL_DECODER_DC7:
break;
default:
dev_dbg(dev, "unsupported mode: %d\n", mode);
@@ -578,6 +579,22 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
goto out;
}
+ if (mode >= CXL_DECODER_DC0 && mode <= CXL_DECODER_DC7) {
+ struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
+
+ rc = dc_mode_to_region_index(mode);
+ if (!resource_size(&cxlds->dc_res[rc])) {
+ dev_dbg(dev, "no available dynamic capacity\n");
+ rc = -ENXIO;
+ goto out;
+ }
+ if (mds->dc_region[rc].shareable) {
+ dev_err(dev, "DC region %d is shareable\n", rc);
+ rc = -EINVAL;
+ goto out;
+ }
+ }
+
cxled->mode = mode;
rc = 0;
out:
diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
index 85b912c11f04..23b4f266a83a 100644
--- a/drivers/cxl/core/port.c
+++ b/drivers/cxl/core/port.c
@@ -205,11 +205,11 @@ static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
enum cxl_decoder_mode mode;
ssize_t rc;
- if (sysfs_streq(buf, "pmem"))
- mode = CXL_DECODER_PMEM;
- else if (sysfs_streq(buf, "ram"))
- mode = CXL_DECODER_RAM;
- else
+ for (mode = CXL_DECODER_RAM; mode < CXL_DECODER_MIXED; mode++)
+ if (sysfs_streq(buf, cxl_decoder_mode_names[mode]))
+ break;
+
+ if (mode >= CXL_DECODER_MIXED)
return -EINVAL;
rc = cxl_dpa_set_mode(cxled, mode);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index 8b7099c38a40..cbaacbe0f36d 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -365,6 +365,9 @@ struct cxl_decoder {
/*
* CXL_DECODER_DEAD prevents endpoints from being reattached to regions
* while cxld_unregister() is running
+ *
+ * NOTE: CXL_DECODER_RAM must be second and CXL_DECODER_MIXED must be last.
+ * See mode_store()
*/
enum cxl_decoder_mode {
CXL_DECODER_NONE,
@@ -382,25 +385,25 @@ enum cxl_decoder_mode {
CXL_DECODER_DEAD,
};
+static const char * const cxl_decoder_mode_names[] = {
+ [CXL_DECODER_NONE] = "none",
+ [CXL_DECODER_RAM] = "ram",
+ [CXL_DECODER_PMEM] = "pmem",
+ [CXL_DECODER_DC0] = "dc0",
+ [CXL_DECODER_DC1] = "dc1",
+ [CXL_DECODER_DC2] = "dc2",
+ [CXL_DECODER_DC3] = "dc3",
+ [CXL_DECODER_DC4] = "dc4",
+ [CXL_DECODER_DC5] = "dc5",
+ [CXL_DECODER_DC6] = "dc6",
+ [CXL_DECODER_DC7] = "dc7",
+ [CXL_DECODER_MIXED] = "mixed",
+};
+
static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode)
{
- static const char * const names[] = {
- [CXL_DECODER_NONE] = "none",
- [CXL_DECODER_RAM] = "ram",
- [CXL_DECODER_PMEM] = "pmem",
- [CXL_DECODER_DC0] = "dc0",
- [CXL_DECODER_DC1] = "dc1",
- [CXL_DECODER_DC2] = "dc2",
- [CXL_DECODER_DC3] = "dc3",
- [CXL_DECODER_DC4] = "dc4",
- [CXL_DECODER_DC5] = "dc5",
- [CXL_DECODER_DC6] = "dc6",
- [CXL_DECODER_DC7] = "dc7",
- [CXL_DECODER_MIXED] = "mixed",
- };
-
if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED)
- return names[mode];
+ return cxl_decoder_mode_names[mode];
return "mixed";
}
--
2.46.0
On Mon, 07 Oct 2024 18:16:20 -0500
ira.weiny@intel.com wrote:
> From: Navneet Singh <navneet.singh@intel.com>
>
> Endpoint decoder mode is used to represent the partition the decoder
> points to such as ram or pmem.
>
> Expand the mode to allow a decoder to point to a specific DC partition
> (Region).
>
> Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> Signed-off-by: Ira Weiny <ira.weiny@intel.com>
A few comments inline about ways that can make this a little tidier
and less fragile.
Jonathan
>
> ---
> Changes:
> [iweiny: prevent creation of region on shareable DC partitions]
> [Fan: change mode range logic]
> [Fan: use !resource_size()]
> [djiang: use the static mode name string array in mode_store()]
> [Jonathan: remove rc check from mode to region index]
> [Jonathan: clarify decoder mode 'mixed']
> [djbw: drop cleanup patch and just follow the convention in cxl_dpa_set_mode()]
> [fan: make dcd resource size check similar to other partitions]
> [djbw, jonathan, fan: remove mode range check from dc_mode_to_region_index]
> [iweiny: push sysfs versions to 6.12]
> ---
> Documentation/ABI/testing/sysfs-bus-cxl | 21 ++++++++++----------
> drivers/cxl/core/hdm.c | 17 ++++++++++++++++
> drivers/cxl/core/port.c | 10 +++++-----
> drivers/cxl/cxl.h | 35 ++++++++++++++++++---------------
> 4 files changed, 52 insertions(+), 31 deletions(-)
>
> diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
> index b865eefdb74c..661dab99183f 100644
> --- a/Documentation/ABI/testing/sysfs-bus-cxl
> +++ b/Documentation/ABI/testing/sysfs-bus-cxl
> @@ -361,23 +361,24 @@ Description:
>
>
> What: /sys/bus/cxl/devices/decoderX.Y/mode
> -Date: May, 2022
> -KernelVersion: v6.0
> +Date: May, 2022, October 2024
> +KernelVersion: v6.0, v6.12 (dcY)
> Contact: linux-cxl@vger.kernel.org
> Description:
> (RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
> translates from a host physical address range, to a device local
> address range. Device-local address ranges are further split
> - into a 'ram' (volatile memory) range and 'pmem' (persistent
> - memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
> - 'mixed', or 'none'. The 'mixed' indication is for error cases
> - when a decoder straddles the volatile/persistent partition
> - boundary, and 'none' indicates the decoder is not actively
> - decoding, or no DPA allocation policy has been set.
> + into a 'ram' (volatile memory) range, 'pmem' (persistent
> + memory) range, or Dynamic Capacity (DC) range.
memory) range, and Dynamic Capacity (DC) ranges.
(doesn't work with preceding text otherwise)
> The 'mode'
> + attribute emits one of 'ram', 'pmem', 'dcY', 'mixed', or
> + 'none'. The 'mixed' indication is for error cases when a
> + decoder straddles partition boundaries, and 'none' indicates
> + the decoder is not actively decoding, or no DPA allocation
> + policy has been set.
>
> 'mode' can be written, when the decoder is in the 'disabled'
> - state, with either 'ram' or 'pmem' to set the boundaries for the
> - next allocation.
> + state, with 'ram', 'pmem', or 'dcY' to set the boundaries for
> + the next allocation.
>
>
> What: /sys/bus/cxl/devices/decoderX.Y/dpa_resource
> diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c
> index 8c7f941eaba1..b368babb55d9 100644
> --- a/drivers/cxl/core/hdm.c
> +++ b/drivers/cxl/core/hdm.c
> @@ -551,6 +551,7 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
> switch (mode) {
> case CXL_DECODER_RAM:
> case CXL_DECODER_PMEM:
> + case CXL_DECODER_DC0 ... CXL_DECODER_DC7:
> break;
> default:
> dev_dbg(dev, "unsupported mode: %d\n", mode);
> @@ -578,6 +579,22 @@ int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled,
> goto out;
> }
>
> + if (mode >= CXL_DECODER_DC0 && mode <= CXL_DECODER_DC7) {
> + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
> +
> + rc = dc_mode_to_region_index(mode);
> + if (!resource_size(&cxlds->dc_res[rc])) {
> + dev_dbg(dev, "no available dynamic capacity\n");
> + rc = -ENXIO;
> + goto out;
Probably worth adding a precursor patch that uses guard(rwsem_write) on
the cxl_dpa_rwsem
Allows for early returns simplifying existing code and this.
> + }
> + if (mds->dc_region[rc].shareable) {
> + dev_err(dev, "DC region %d is shareable\n", rc);
> + rc = -EINVAL;
> + goto out;
> + }
> + }
> +
> cxled->mode = mode;
> rc = 0;
> out:
> diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> index 85b912c11f04..23b4f266a83a 100644
> --- a/drivers/cxl/core/port.c
> +++ b/drivers/cxl/core/port.c
> @@ -205,11 +205,11 @@ static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
> enum cxl_decoder_mode mode;
> ssize_t rc;
>
> - if (sysfs_streq(buf, "pmem"))
> - mode = CXL_DECODER_PMEM;
> - else if (sysfs_streq(buf, "ram"))
> - mode = CXL_DECODER_RAM;
> - else
> + for (mode = CXL_DECODER_RAM; mode < CXL_DECODER_MIXED; mode++)
> + if (sysfs_streq(buf, cxl_decoder_mode_names[mode]))
> + break;
> +
Loop over them all then do what you have here but explicit matches
to reject the ones that can't be set.
Add a MODE_COUNT to the end of the options.
for (mode = 0; mode < CXL_DECODER_MODE_COUNT; mode++)
if (sysfs_streq(buf, cxl_decoder_mode_names[mode]))
break;
if (mode == CXL_DECODER_MODE_COUNT)
return -EINVAL;
if (mode == CXL_DECODER_NONE)
return -EINVAL;
/* Not yet supported */
if (mode == CXL_DECODER_MIXED)
return -EINVAL;
...
> + if (mode >= CXL_DECODER_MIXED)
> return -EINVAL;
>
> rc = cxl_dpa_set_mode(cxled, mode);
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index 8b7099c38a40..cbaacbe0f36d 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -365,6 +365,9 @@ struct cxl_decoder {
> /*
> * CXL_DECODER_DEAD prevents endpoints from being reattached to regions
> * while cxld_unregister() is running
> + *
> + * NOTE: CXL_DECODER_RAM must be second and CXL_DECODER_MIXED must be last.
This is a bit ugly. I'd change the logic a bit to avoid it.
The list of things we don't support is short so just check for them.
See above.
> + * See mode_store()
> */
> enum cxl_decoder_mode {
> CXL_DECODER_NONE,
> @@ -382,25 +385,25 @@ enum cxl_decoder_mode {
> CXL_DECODER_DEAD,
> };
>
> +static const char * const cxl_decoder_mode_names[] = {
> + [CXL_DECODER_NONE] = "none",
> + [CXL_DECODER_RAM] = "ram",
> + [CXL_DECODER_PMEM] = "pmem",
> + [CXL_DECODER_DC0] = "dc0",
> + [CXL_DECODER_DC1] = "dc1",
> + [CXL_DECODER_DC2] = "dc2",
> + [CXL_DECODER_DC3] = "dc3",
> + [CXL_DECODER_DC4] = "dc4",
> + [CXL_DECODER_DC5] = "dc5",
> + [CXL_DECODER_DC6] = "dc6",
> + [CXL_DECODER_DC7] = "dc7",
> + [CXL_DECODER_MIXED] = "mixed",
> +};
> +
> static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode)
> {
> - static const char * const names[] = {
> - [CXL_DECODER_NONE] = "none",
> - [CXL_DECODER_RAM] = "ram",
> - [CXL_DECODER_PMEM] = "pmem",
> - [CXL_DECODER_DC0] = "dc0",
> - [CXL_DECODER_DC1] = "dc1",
> - [CXL_DECODER_DC2] = "dc2",
> - [CXL_DECODER_DC3] = "dc3",
> - [CXL_DECODER_DC4] = "dc4",
> - [CXL_DECODER_DC5] = "dc5",
> - [CXL_DECODER_DC6] = "dc6",
> - [CXL_DECODER_DC7] = "dc7",
> - [CXL_DECODER_MIXED] = "mixed",
> - };
> -
> if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED)
> - return names[mode];
> + return cxl_decoder_mode_names[mode];
> return "mixed";
> }
>
>
Jonathan Cameron wrote:
> On Mon, 07 Oct 2024 18:16:20 -0500
> ira.weiny@intel.com wrote:
>
> > From: Navneet Singh <navneet.singh@intel.com>
> >
> > Endpoint decoder mode is used to represent the partition the decoder
> > points to such as ram or pmem.
> >
> > Expand the mode to allow a decoder to point to a specific DC partition
> > (Region).
> >
> > Signed-off-by: Navneet Singh <navneet.singh@intel.com>
> > Co-developed-by: Ira Weiny <ira.weiny@intel.com>
> > Signed-off-by: Ira Weiny <ira.weiny@intel.com>
>
> A few comments inline about ways that can make this a little tidier
> and less fragile.
All Done. Yea good idea on the enum.
Ira
[snip]
> > (RW) When a CXL decoder is of devtype "cxl_decoder_endpoint" it
> > translates from a host physical address range, to a device local
> > address range. Device-local address ranges are further split
> > - into a 'ram' (volatile memory) range and 'pmem' (persistent
> > - memory) range. The 'mode' attribute emits one of 'ram', 'pmem',
> > - 'mixed', or 'none'. The 'mixed' indication is for error cases
> > - when a decoder straddles the volatile/persistent partition
> > - boundary, and 'none' indicates the decoder is not actively
> > - decoding, or no DPA allocation policy has been set.
> > + into a 'ram' (volatile memory) range, 'pmem' (persistent
> > + memory) range, or Dynamic Capacity (DC) range.
> memory) range, and Dynamic Capacity (DC) ranges.
>
> (doesn't work with preceding text otherwise)
>
[snip]
> > + if (mode >= CXL_DECODER_DC0 && mode <= CXL_DECODER_DC7) {
> > + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
> > +
> > + rc = dc_mode_to_region_index(mode);
> > + if (!resource_size(&cxlds->dc_res[rc])) {
> > + dev_dbg(dev, "no available dynamic capacity\n");
> > + rc = -ENXIO;
> > + goto out;
> Probably worth adding a precursor patch that uses guard(rwsem_write) on
> the cxl_dpa_rwsem
> Allows for early returns simplifying existing code and this.
[snip]
> > diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
> > index 85b912c11f04..23b4f266a83a 100644
> > --- a/drivers/cxl/core/port.c
> > +++ b/drivers/cxl/core/port.c
> > @@ -205,11 +205,11 @@ static ssize_t mode_store(struct device *dev, struct device_attribute *attr,
> > enum cxl_decoder_mode mode;
> > ssize_t rc;
> >
> > - if (sysfs_streq(buf, "pmem"))
> > - mode = CXL_DECODER_PMEM;
> > - else if (sysfs_streq(buf, "ram"))
> > - mode = CXL_DECODER_RAM;
> > - else
> > + for (mode = CXL_DECODER_RAM; mode < CXL_DECODER_MIXED; mode++)
> > + if (sysfs_streq(buf, cxl_decoder_mode_names[mode]))
> > + break;
> > +
> Loop over them all then do what you have here but explicit matches
> to reject the ones that can't be set.
> Add a MODE_COUNT to the end of the options.
>
> for (mode = 0; mode < CXL_DECODER_MODE_COUNT; mode++)
> if (sysfs_streq(buf, cxl_decoder_mode_names[mode]))
> break;
>
> if (mode == CXL_DECODER_MODE_COUNT)
> return -EINVAL;
>
> if (mode == CXL_DECODER_NONE)
> return -EINVAL;
>
> /* Not yet supported */
> if (mode == CXL_DECODER_MIXED)
> return -EINVAL;
> ...
>
> > + if (mode >= CXL_DECODER_MIXED)
> > return -EINVAL;
> >
> > rc = cxl_dpa_set_mode(cxled, mode);
> > diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> > index 8b7099c38a40..cbaacbe0f36d 100644
> > --- a/drivers/cxl/cxl.h
> > +++ b/drivers/cxl/cxl.h
> > @@ -365,6 +365,9 @@ struct cxl_decoder {
> > /*
> > * CXL_DECODER_DEAD prevents endpoints from being reattached to regions
> > * while cxld_unregister() is running
> > + *
> > + * NOTE: CXL_DECODER_RAM must be second and CXL_DECODER_MIXED must be last.
> This is a bit ugly. I'd change the logic a bit to avoid it.
> The list of things we don't support is short so just check for them.
> See above.
>
[snip]
© 2016 - 2026 Red Hat, Inc.