[PATCH 1/6] drivers/cxl: add cxl_memctrl_mode and region->memctrl

Gregory Price posted 6 patches 4 weeks ago
[PATCH 1/6] drivers/cxl: add cxl_memctrl_mode and region->memctrl
Posted by Gregory Price 4 weeks ago
The CXL driver presently hands policy management over to DAX subsystem
for sysram regions, which makes building policy around the entire region
clunky and at time difficult (e.g. multiple actions to offline and
hot-unplug memory reliably).

To support multiple backend controllers for memory regions (for example
dax vs direct hotplug), implement a memctrl field in cxl_region allows
switching uncomitted regions between different "memory controllers".

CXL_CONTROL_NONE: No selected controller, probe will fail.
CXL_CONTROL_AUTO: If memory is already online as SysRAM, no controller
                  otherwise register a dax_region
CXL_CONTROL_DAX : register a dax_region

Auto regions will either be static sysram (BIOS-onlined) and has no
region controller associated with it - or if the SP bit was set a
DAX device will be created.

Rather than default all regions to the auto-controller, only default
auto-regions to the auto controller.

Non-auto regions will be defaulted to CXL_CONTROL_NONE, which will cause
a failure to probe unless a controller is selected.

Signed-off-by: Gregory Price <gourry@gourry.net>
---
 drivers/cxl/core/Makefile             |   1 +
 drivers/cxl/core/core.h               |   2 +
 drivers/cxl/core/memctrl/Makefile     |   4 +
 drivers/cxl/core/memctrl/dax_region.c |  79 +++++++++++++++
 drivers/cxl/core/memctrl/memctrl.c    |  42 ++++++++
 drivers/cxl/core/region.c             | 136 ++++++++++----------------
 drivers/cxl/cxl.h                     |  14 +++
 7 files changed, 192 insertions(+), 86 deletions(-)
 create mode 100644 drivers/cxl/core/memctrl/Makefile
 create mode 100644 drivers/cxl/core/memctrl/dax_region.c
 create mode 100644 drivers/cxl/core/memctrl/memctrl.c

diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
index 5ad8fef210b5..79de20e3f8aa 100644
--- a/drivers/cxl/core/Makefile
+++ b/drivers/cxl/core/Makefile
@@ -17,6 +17,7 @@ cxl_core-y += cdat.o
 cxl_core-y += ras.o
 cxl_core-$(CONFIG_TRACING) += trace.o
 cxl_core-$(CONFIG_CXL_REGION) += region.o
+include $(src)/memctrl/Makefile
 cxl_core-$(CONFIG_CXL_MCE) += mce.o
 cxl_core-$(CONFIG_CXL_FEATURES) += features.o
 cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
index 1fb66132b777..1156a4bd0080 100644
--- a/drivers/cxl/core/core.h
+++ b/drivers/cxl/core/core.h
@@ -42,6 +42,8 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port);
 struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa);
 u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
 		   u64 dpa);
+int cxl_enable_memctrl(struct cxl_region *cxlr);
+int devm_cxl_add_dax_region(struct cxl_region *cxlr);
 
 #else
 static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
diff --git a/drivers/cxl/core/memctrl/Makefile b/drivers/cxl/core/memctrl/Makefile
new file mode 100644
index 000000000000..8165aad5a52a
--- /dev/null
+++ b/drivers/cxl/core/memctrl/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+
+cxl_core-$(CONFIG_CXL_REGION) += memctrl/memctrl.o
+cxl_core-$(CONFIG_CXL_REGION) += memctrl/dax_region.o
diff --git a/drivers/cxl/core/memctrl/dax_region.c b/drivers/cxl/core/memctrl/dax_region.c
new file mode 100644
index 000000000000..90d7fdb97013
--- /dev/null
+++ b/drivers/cxl/core/memctrl/dax_region.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <cxlmem.h>
+#include <cxl.h>
+#include "../core.h"
+
+static struct lock_class_key cxl_dax_region_key;
+
+static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
+{
+	struct cxl_region_params *p = &cxlr->params;
+	struct cxl_dax_region *cxlr_dax;
+	struct device *dev;
+
+	guard(rwsem_read)(&cxl_rwsem.region);
+	if (p->state != CXL_CONFIG_COMMIT)
+		return ERR_PTR(-ENXIO);
+
+	cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
+	if (!cxlr_dax)
+		return ERR_PTR(-ENOMEM);
+
+	cxlr_dax->hpa_range.start = p->res->start;
+	cxlr_dax->hpa_range.end = p->res->end;
+
+	dev = &cxlr_dax->dev;
+	cxlr_dax->cxlr = cxlr;
+	device_initialize(dev);
+	lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
+	device_set_pm_not_required(dev);
+	dev->parent = &cxlr->dev;
+	dev->bus = &cxl_bus_type;
+	dev->type = &cxl_dax_region_type;
+
+	return cxlr_dax;
+}
+
+static void cxlr_dax_unregister(void *_cxlr_dax)
+{
+	struct cxl_dax_region *cxlr_dax = _cxlr_dax;
+
+	device_unregister(&cxlr_dax->dev);
+}
+
+/*
+ * The dax controller is the default controller and simply hands the
+ * control pattern over to the dax driver.  It does with a dax_region
+ * built by dax/cxl.c
+ */
+int devm_cxl_add_dax_region(struct cxl_region *cxlr)
+{
+	struct cxl_dax_region *cxlr_dax;
+	struct device *dev;
+	int rc;
+
+	cxlr_dax = cxl_dax_region_alloc(cxlr);
+	if (IS_ERR(cxlr_dax))
+		return PTR_ERR(cxlr_dax);
+
+	dev = &cxlr_dax->dev;
+	rc = dev_set_name(dev, "dax_region%d", cxlr->id);
+	if (rc)
+		goto err;
+
+	rc = device_add(dev);
+	if (rc)
+		goto err;
+
+	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
+		dev_name(dev));
+
+	return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
+					cxlr_dax);
+err:
+	put_device(dev);
+	return rc;
+}
diff --git a/drivers/cxl/core/memctrl/memctrl.c b/drivers/cxl/core/memctrl/memctrl.c
new file mode 100644
index 000000000000..24e0e14b39c7
--- /dev/null
+++ b/drivers/cxl/core/memctrl/memctrl.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
+/* Copyright(c) 2026 Meta Inc. All rights reserved. */
+#include <linux/device.h>
+#include <linux/ioport.h>
+#include <cxlmem.h>
+#include <cxl.h>
+#include "../core.h"
+
+static int is_system_ram(struct resource *res, void *arg)
+{
+	struct cxl_region *cxlr = arg;
+	struct cxl_region_params *p = &cxlr->params;
+
+	dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
+	return 1;
+}
+
+int cxl_enable_memctrl(struct cxl_region *cxlr)
+{
+	struct cxl_region_params *p = &cxlr->params;
+
+	switch (cxlr->memctrl) {
+	case CXL_MEMCTRL_AUTO:
+		/*
+		 * The region can not be manged by CXL if any portion of
+		 * it is already online as 'System RAM'
+		 */
+		if (walk_iomem_res_desc(IORES_DESC_NONE,
+					IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
+					p->res->start, p->res->end, cxlr,
+					is_system_ram) > 0)
+			return 0;
+		return devm_cxl_add_dax_region(cxlr);
+	case CXL_MEMCTRL_DAX:
+		return devm_cxl_add_dax_region(cxlr);
+	default:
+		return -EINVAL;
+	}
+}
+
+
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index ae899f68551f..02d7d9ae0252 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -626,6 +626,50 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
 }
 static DEVICE_ATTR_RO(mode);
 
+static ssize_t ctrl_show(struct device *dev, struct device_attribute *attr,
+			 char *buf)
+{
+	struct cxl_region *cxlr = to_cxl_region(dev);
+	const char *desc;
+
+	switch (cxlr->memctrl) {
+	case CXL_MEMCTRL_AUTO:
+		desc = "auto";
+		break;
+	case CXL_MEMCTRL_DAX:
+		desc = "dax";
+		break;
+	default:
+		desc = "";
+		break;
+	}
+
+	return sysfs_emit(buf, "%s\n", desc);
+}
+
+static ssize_t ctrl_store(struct device *dev, struct device_attribute *attr,
+			  const char *buf, size_t len)
+{
+	struct cxl_region *cxlr = to_cxl_region(dev);
+	struct cxl_region_params *p = &cxlr->params;
+	int rc;
+
+	ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
+	if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
+		return rc;
+
+	if (p->state >= CXL_CONFIG_COMMIT)
+		return -EBUSY;
+
+	if (sysfs_streq(buf, "dax"))
+		cxlr->memctrl = CXL_MEMCTRL_DAX;
+	else
+		return -EINVAL;
+
+	return len;
+}
+static DEVICE_ATTR_RW(ctrl);
+
 static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
 {
 	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
@@ -772,6 +816,7 @@ static struct attribute *cxl_region_attrs[] = {
 	&dev_attr_size.attr,
 	&dev_attr_mode.attr,
 	&dev_attr_extended_linear_cache_size.attr,
+	&dev_attr_ctrl.attr,
 	NULL,
 };
 
@@ -2598,6 +2643,7 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
 		return cxlr;
 	cxlr->mode = mode;
 	cxlr->type = type;
+	cxlr->memctrl = CXL_MEMCTRL_NONE;
 
 	dev = &cxlr->dev;
 	rc = dev_set_name(dev, "region%d", id);
@@ -3307,37 +3353,6 @@ struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
 }
 EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, "CXL");
 
-static struct lock_class_key cxl_dax_region_key;
-
-static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
-{
-	struct cxl_region_params *p = &cxlr->params;
-	struct cxl_dax_region *cxlr_dax;
-	struct device *dev;
-
-	guard(rwsem_read)(&cxl_rwsem.region);
-	if (p->state != CXL_CONFIG_COMMIT)
-		return ERR_PTR(-ENXIO);
-
-	cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
-	if (!cxlr_dax)
-		return ERR_PTR(-ENOMEM);
-
-	cxlr_dax->hpa_range.start = p->res->start;
-	cxlr_dax->hpa_range.end = p->res->end;
-
-	dev = &cxlr_dax->dev;
-	cxlr_dax->cxlr = cxlr;
-	device_initialize(dev);
-	lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
-	device_set_pm_not_required(dev);
-	dev->parent = &cxlr->dev;
-	dev->bus = &cxl_bus_type;
-	dev->type = &cxl_dax_region_type;
-
-	return cxlr_dax;
-}
-
 static void cxlr_pmem_unregister(void *_cxlr_pmem)
 {
 	struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
@@ -3424,42 +3439,6 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
 	return rc;
 }
 
-static void cxlr_dax_unregister(void *_cxlr_dax)
-{
-	struct cxl_dax_region *cxlr_dax = _cxlr_dax;
-
-	device_unregister(&cxlr_dax->dev);
-}
-
-static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
-{
-	struct cxl_dax_region *cxlr_dax;
-	struct device *dev;
-	int rc;
-
-	cxlr_dax = cxl_dax_region_alloc(cxlr);
-	if (IS_ERR(cxlr_dax))
-		return PTR_ERR(cxlr_dax);
-
-	dev = &cxlr_dax->dev;
-	rc = dev_set_name(dev, "dax_region%d", cxlr->id);
-	if (rc)
-		goto err;
-
-	rc = device_add(dev);
-	if (rc)
-		goto err;
-
-	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
-		dev_name(dev));
-
-	return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
-					cxlr_dax);
-err:
-	put_device(dev);
-	return rc;
-}
-
 static int match_decoder_by_range(struct device *dev, const void *data)
 {
 	const struct range *r1, *r2 = data;
@@ -3579,6 +3558,9 @@ static int __construct_region(struct cxl_region *cxlr,
 
 	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
 
+	/* Auto-regions will either be static sysram (onlined by BIOS) or DAX */
+	cxlr->memctrl = CXL_MEMCTRL_AUTO;
+
 	res = kmalloc(sizeof(*res), GFP_KERNEL);
 	if (!res)
 		return -ENOMEM;
@@ -3755,15 +3737,6 @@ u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa)
 }
 EXPORT_SYMBOL_NS_GPL(cxl_port_get_spa_cache_alias, "CXL");
 
-static int is_system_ram(struct resource *res, void *arg)
-{
-	struct cxl_region *cxlr = arg;
-	struct cxl_region_params *p = &cxlr->params;
-
-	dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
-	return 1;
-}
-
 static void shutdown_notifiers(void *_cxlr)
 {
 	struct cxl_region *cxlr = _cxlr;
@@ -3965,16 +3938,7 @@ static int cxl_region_probe(struct device *dev)
 			dev_dbg(&cxlr->dev, "CXL EDAC registration for region_id=%d failed\n",
 				cxlr->id);
 
-		/*
-		 * The region can not be manged by CXL if any portion of
-		 * it is already online as 'System RAM'
-		 */
-		if (walk_iomem_res_desc(IORES_DESC_NONE,
-					IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
-					p->res->start, p->res->end, cxlr,
-					is_system_ram) > 0)
-			return 0;
-		return devm_cxl_add_dax_region(cxlr);
+		return cxl_enable_memctrl(cxlr);
 	default:
 		dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
 			cxlr->mode);
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index ba17fa86d249..b8fabaa77262 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -502,6 +502,19 @@ enum cxl_partition_mode {
 	CXL_PARTMODE_PMEM,
 };
 
+
+/*
+ * Memory Controller modes:
+ *   None - No controller selected
+ *   Auto - either BIOS-configured as SysRAM, or default to DAX
+ *   DAX  - creates a dax_region controller for the cxl_region
+ */
+enum cxl_memctrl_mode {
+	CXL_MEMCTRL_NONE,
+	CXL_MEMCTRL_AUTO,
+	CXL_MEMCTRL_DAX,
+};
+
 /*
  * Indicate whether this region has been assembled by autodetection or
  * userspace assembly. Prevent endpoint decoders outside of automatic
@@ -543,6 +556,7 @@ struct cxl_region {
 	struct device dev;
 	int id;
 	enum cxl_partition_mode mode;
+	enum cxl_memctrl_mode memctrl;
 	enum cxl_decoder_type type;
 	struct cxl_nvdimm_bridge *cxl_nvb;
 	struct cxl_pmem_region *cxlr_pmem;
-- 
2.52.0
Re: [PATCH 1/6] drivers/cxl: add cxl_memctrl_mode and region->memctrl
Posted by Jonathan Cameron 3 weeks, 5 days ago
On Mon, 12 Jan 2026 11:35:09 -0500
Gregory Price <gourry@gourry.net> wrote:

> The CXL driver presently hands policy management over to DAX subsystem
> for sysram regions, which makes building policy around the entire region
> clunky and at time difficult (e.g. multiple actions to offline and
> hot-unplug memory reliably).
> 
> To support multiple backend controllers for memory regions (for example
> dax vs direct hotplug), implement a memctrl field in cxl_region allows
> switching uncomitted regions between different "memory controllers".
> 
> CXL_CONTROL_NONE: No selected controller, probe will fail.
> CXL_CONTROL_AUTO: If memory is already online as SysRAM, no controller
>                   otherwise register a dax_region
> CXL_CONTROL_DAX : register a dax_region
> 
> Auto regions will either be static sysram (BIOS-onlined) and has no
> region controller associated with it - or if the SP bit was set a
> DAX device will be created.
> 
> Rather than default all regions to the auto-controller, only default
> auto-regions to the auto controller.
> 
> Non-auto regions will be defaulted to CXL_CONTROL_NONE, which will cause
> a failure to probe unless a controller is selected.
> 
> Signed-off-by: Gregory Price <gourry@gourry.net>
Trivial comments whilst I try to get my head around the series.

...

> diff --git a/drivers/cxl/core/memctrl/Makefile b/drivers/cxl/core/memctrl/Makefile
> new file mode 100644
> index 000000000000..8165aad5a52a
> --- /dev/null
> +++ b/drivers/cxl/core/memctrl/Makefile
> @@ -0,0 +1,4 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +cxl_core-$(CONFIG_CXL_REGION) += memctrl/memctrl.o
> +cxl_core-$(CONFIG_CXL_REGION) += memctrl/dax_region.o
> diff --git a/drivers/cxl/core/memctrl/dax_region.c b/drivers/cxl/core/memctrl/dax_region.c
> new file mode 100644
> index 000000000000..90d7fdb97013
> --- /dev/null
> +++ b/drivers/cxl/core/memctrl/dax_region.c

Can we break the code move out as a precursor with a bit of explanation of
why?  I want to be able to spot functional changes more easily.


> @@ -0,0 +1,79 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
> +#include <linux/device.h>
> +#include <linux/slab.h>
> +#include <cxlmem.h>
> +#include <cxl.h>
> +#include "../core.h"
> +
> +static struct lock_class_key cxl_dax_region_key;

> +
> +/*
> + * The dax controller is the default controller and simply hands the
> + * control pattern over to the dax driver.  It does with a dax_region
> + * built by dax/cxl.c
> + */
> +int devm_cxl_add_dax_region(struct cxl_region *cxlr)
> +{
> +	struct cxl_dax_region *cxlr_dax;
> +	struct device *dev;
> +	int rc;
> +
> +	cxlr_dax = cxl_dax_region_alloc(cxlr);

Nice to spinkle __free magic on this one though obviously 
unrelated to what you are focused on here!


> +	if (IS_ERR(cxlr_dax))
> +		return PTR_ERR(cxlr_dax);
> +
> +	dev = &cxlr_dax->dev;
> +	rc = dev_set_name(dev, "dax_region%d", cxlr->id);
> +	if (rc)
> +		goto err;
> +
> +	rc = device_add(dev);
> +	if (rc)
> +		goto err;
> +
> +	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
> +		dev_name(dev));
> +
> +	return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
> +					cxlr_dax);
> +err:
> +	put_device(dev);
> +	return rc;
> +}
> diff --git a/drivers/cxl/core/memctrl/memctrl.c b/drivers/cxl/core/memctrl/memctrl.c
> new file mode 100644
> index 000000000000..24e0e14b39c7
> --- /dev/null
> +++ b/drivers/cxl/core/memctrl/memctrl.c
...

> +int cxl_enable_memctrl(struct cxl_region *cxlr)
> +{
> +	struct cxl_region_params *p = &cxlr->params;
> +
> +	switch (cxlr->memctrl) {
> +	case CXL_MEMCTRL_AUTO:
> +		/*
> +		 * The region can not be manged by CXL if any portion of
> +		 * it is already online as 'System RAM'
> +		 */
> +		if (walk_iomem_res_desc(IORES_DESC_NONE,
> +					IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
> +					p->res->start, p->res->end, cxlr,
> +					is_system_ram) > 0)
> +			return 0;
> +		return devm_cxl_add_dax_region(cxlr);
> +	case CXL_MEMCTRL_DAX:
> +		return devm_cxl_add_dax_region(cxlr);
> +	default:
> +		return -EINVAL;
> +	}
> +}
> +
I can't resist pointing out the trivial.  One line sufficient.

> +
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index ae899f68551f..02d7d9ae0252 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -626,6 +626,50 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
>  }
>  static DEVICE_ATTR_RO(mode);
>  
> +static ssize_t ctrl_show(struct device *dev, struct device_attribute *attr,
> +			 char *buf)
> +{
> +	struct cxl_region *cxlr = to_cxl_region(dev);
> +	const char *desc;
> +
> +	switch (cxlr->memctrl) {
> +	case CXL_MEMCTRL_AUTO:
> +		desc = "auto";

If any expectation of non trivial number of these, look up in an array rather
than a switch statement. I'll scale better.

> +		break;
> +	case CXL_MEMCTRL_DAX:
> +		desc = "dax";
> +		break;
> +	default:

I'd call this out as NONE simply as then the compiler will point out if you
missed adding code here when a new type is added.

> +		desc = "";
> +		break;
> +	}
> +
> +	return sysfs_emit(buf, "%s\n", desc);
> +}

> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index ba17fa86d249..b8fabaa77262 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -502,6 +502,19 @@ enum cxl_partition_mode {
>  	CXL_PARTMODE_PMEM,
>  };
>  
> +
> +/*
> + * Memory Controller modes:

Give it kernel-doc. 

> + *   None - No controller selected
> + *   Auto - either BIOS-configured as SysRAM, or default to DAX
> + *   DAX  - creates a dax_region controller for the cxl_region
> + */
> +enum cxl_memctrl_mode {
> +	CXL_MEMCTRL_NONE,
> +	CXL_MEMCTRL_AUTO,
> +	CXL_MEMCTRL_DAX,
> +};
> +
>  /*
>   * Indicate whether this region has been assembled by autodetection or
>   * userspace assembly. Prevent endpoint decoders outside of automatic
> @@ -543,6 +556,7 @@ struct cxl_region {
>  	struct device dev;
>  	int id;
>  	enum cxl_partition_mode mode;
> +	enum cxl_memctrl_mode memctrl;
Needs kernel-doc.

>  	enum cxl_decoder_type type;
>  	struct cxl_nvdimm_bridge *cxl_nvb;
>  	struct cxl_pmem_region *cxlr_pmem;
Re: [PATCH 1/6] drivers/cxl: add cxl_memctrl_mode and region->memctrl
Posted by Gregory Price 3 weeks, 5 days ago
On Wed, Jan 14, 2026 at 05:18:08PM +0000, Jonathan Cameron wrote:
> On Mon, 12 Jan 2026 11:35:09 -0500
> Gregory Price <gourry@gourry.net> wrote:
> 
> > The CXL driver presently hands policy management over to DAX subsystem
> > for sysram regions, which makes building policy around the entire region
> > clunky and at time difficult (e.g. multiple actions to offline and
> > hot-unplug memory reliably).
> > 
> > To support multiple backend controllers for memory regions (for example
> > dax vs direct hotplug), implement a memctrl field in cxl_region allows
> > switching uncomitted regions between different "memory controllers".
> > 
> > CXL_CONTROL_NONE: No selected controller, probe will fail.
> > CXL_CONTROL_AUTO: If memory is already online as SysRAM, no controller
> >                   otherwise register a dax_region
> > CXL_CONTROL_DAX : register a dax_region
> > 
> > Auto regions will either be static sysram (BIOS-onlined) and has no
> > region controller associated with it - or if the SP bit was set a
> > DAX device will be created.
> > 
> > Rather than default all regions to the auto-controller, only default
> > auto-regions to the auto controller.
> > 
> > Non-auto regions will be defaulted to CXL_CONTROL_NONE, which will cause
> > a failure to probe unless a controller is selected.
> > 
> > Signed-off-by: Gregory Price <gourry@gourry.net>
> Trivial comments whilst I try to get my head around the series.
> 
> ...
>

Recommend piviting to v2 which simplifies and addresses some of your
notes here already:

https://lore.kernel.org/linux-cxl/aWfe-r7uEV-ajfhX@gourry-fedora-PF4VCD3F/T/#m791301178876f9b1ab55ed4091c674d4f4ceb07c

Still need to add some docs.

~Gregory
Re: [PATCH 1/6] drivers/cxl: add cxl_memctrl_mode and region->memctrl
Posted by Jonathan Cameron 3 weeks, 5 days ago
On Wed, 14 Jan 2026 13:25:53 -0500
Gregory Price <gourry@gourry.net> wrote:

> On Wed, Jan 14, 2026 at 05:18:08PM +0000, Jonathan Cameron wrote:
> > On Mon, 12 Jan 2026 11:35:09 -0500
> > Gregory Price <gourry@gourry.net> wrote:
> >   
> > > The CXL driver presently hands policy management over to DAX subsystem
> > > for sysram regions, which makes building policy around the entire region
> > > clunky and at time difficult (e.g. multiple actions to offline and
> > > hot-unplug memory reliably).
> > > 
> > > To support multiple backend controllers for memory regions (for example
> > > dax vs direct hotplug), implement a memctrl field in cxl_region allows
> > > switching uncomitted regions between different "memory controllers".
> > > 
> > > CXL_CONTROL_NONE: No selected controller, probe will fail.
> > > CXL_CONTROL_AUTO: If memory is already online as SysRAM, no controller
> > >                   otherwise register a dax_region
> > > CXL_CONTROL_DAX : register a dax_region
> > > 
> > > Auto regions will either be static sysram (BIOS-onlined) and has no
> > > region controller associated with it - or if the SP bit was set a
> > > DAX device will be created.
> > > 
> > > Rather than default all regions to the auto-controller, only default
> > > auto-regions to the auto controller.
> > > 
> > > Non-auto regions will be defaulted to CXL_CONTROL_NONE, which will cause
> > > a failure to probe unless a controller is selected.
> > > 
> > > Signed-off-by: Gregory Price <gourry@gourry.net>  
> > Trivial comments whilst I try to get my head around the series.
> > 
> > ...
> >  
> 
> Recommend piviting to v2 which simplifies and addresses some of your
> notes here already:
> 
> https://lore.kernel.org/linux-cxl/aWfe-r7uEV-ajfhX@gourry-fedora-PF4VCD3F/T/#m791301178876f9b1ab55ed4091c674d4f4ceb07c
> 
> Still need to add some docs.
That came cover letter free so I didn't figure out it was this
until a few patches in!

J
> 
> ~Gregory
Re: [PATCH 1/6] drivers/cxl: add cxl_memctrl_mode and region->memctrl
Posted by Cheatham, Benjamin 4 weeks ago
On 1/12/2026 10:35 AM, Gregory Price wrote:
> The CXL driver presently hands policy management over to DAX subsystem
> for sysram regions, which makes building policy around the entire region
> clunky and at time difficult (e.g. multiple actions to offline and
> hot-unplug memory reliably).
> 
> To support multiple backend controllers for memory regions (for example
> dax vs direct hotplug), implement a memctrl field in cxl_region allows
> switching uncomitted regions between different "memory controllers".
> 
> CXL_CONTROL_NONE: No selected controller, probe will fail.
> CXL_CONTROL_AUTO: If memory is already online as SysRAM, no controller
>                   otherwise register a dax_region

I think you can streamline this to get rid of CXL_CONTROL_AUTO. If BIOS set up
the memory you can just set the mode to CXL_CONTROL_NONE, otherwise use CXL_CONTROL_DAX.
This patch would be a bit less complicated at the very least, and I don't think it
would require much reworking of later patches.

> CXL_CONTROL_DAX : register a dax_region
> 
> Auto regions will either be static sysram (BIOS-onlined) and has no
> region controller associated with it - or if the SP bit was set a
> DAX device will be created.
> 
> Rather than default all regions to the auto-controller, only default
> auto-regions to the auto controller.
> 
> Non-auto regions will be defaulted to CXL_CONTROL_NONE, which will cause
> a failure to probe unless a controller is selected.
> 
> Signed-off-by: Gregory Price <gourry@gourry.net>
> ---
>  drivers/cxl/core/Makefile             |   1 +
>  drivers/cxl/core/core.h               |   2 +
>  drivers/cxl/core/memctrl/Makefile     |   4 +
>  drivers/cxl/core/memctrl/dax_region.c |  79 +++++++++++++++
>  drivers/cxl/core/memctrl/memctrl.c    |  42 ++++++++
>  drivers/cxl/core/region.c             | 136 ++++++++++----------------
>  drivers/cxl/cxl.h                     |  14 +++
>  7 files changed, 192 insertions(+), 86 deletions(-)
>  create mode 100644 drivers/cxl/core/memctrl/Makefile
>  create mode 100644 drivers/cxl/core/memctrl/dax_region.c
>  create mode 100644 drivers/cxl/core/memctrl/memctrl.c
> 
> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
> index 5ad8fef210b5..79de20e3f8aa 100644
> --- a/drivers/cxl/core/Makefile
> +++ b/drivers/cxl/core/Makefile
> @@ -17,6 +17,7 @@ cxl_core-y += cdat.o
>  cxl_core-y += ras.o
>  cxl_core-$(CONFIG_TRACING) += trace.o
>  cxl_core-$(CONFIG_CXL_REGION) += region.o
> +include $(src)/memctrl/Makefile
>  cxl_core-$(CONFIG_CXL_MCE) += mce.o
>  cxl_core-$(CONFIG_CXL_FEATURES) += features.o
>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 1fb66132b777..1156a4bd0080 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -42,6 +42,8 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port);
>  struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa);
>  u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
>  		   u64 dpa);
> +int cxl_enable_memctrl(struct cxl_region *cxlr);
> +int devm_cxl_add_dax_region(struct cxl_region *cxlr);
>  
>  #else
>  static inline u64 cxl_dpa_to_hpa(struct cxl_region *cxlr,
> diff --git a/drivers/cxl/core/memctrl/Makefile b/drivers/cxl/core/memctrl/Makefile
> new file mode 100644
> index 000000000000..8165aad5a52a
> --- /dev/null
> +++ b/drivers/cxl/core/memctrl/Makefile
> @@ -0,0 +1,4 @@
> +# SPDX-License-Identifier: GPL-2.0
> +
> +cxl_core-$(CONFIG_CXL_REGION) += memctrl/memctrl.o
> +cxl_core-$(CONFIG_CXL_REGION) += memctrl/dax_region.o
> diff --git a/drivers/cxl/core/memctrl/dax_region.c b/drivers/cxl/core/memctrl/dax_region.c
> new file mode 100644
> index 000000000000..90d7fdb97013
> --- /dev/null
> +++ b/drivers/cxl/core/memctrl/dax_region.c
> @@ -0,0 +1,79 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
> +#include <linux/device.h>
> +#include <linux/slab.h>
> +#include <cxlmem.h>
> +#include <cxl.h>
> +#include "../core.h"
> +
> +static struct lock_class_key cxl_dax_region_key;
> +
> +static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
> +{
> +	struct cxl_region_params *p = &cxlr->params;
> +	struct cxl_dax_region *cxlr_dax;
> +	struct device *dev;
> +
> +	guard(rwsem_read)(&cxl_rwsem.region);
> +	if (p->state != CXL_CONFIG_COMMIT)
> +		return ERR_PTR(-ENXIO);
> +
> +	cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
> +	if (!cxlr_dax)
> +		return ERR_PTR(-ENOMEM);
> +
> +	cxlr_dax->hpa_range.start = p->res->start;
> +	cxlr_dax->hpa_range.end = p->res->end;
> +
> +	dev = &cxlr_dax->dev;
> +	cxlr_dax->cxlr = cxlr;
> +	device_initialize(dev);
> +	lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
> +	device_set_pm_not_required(dev);
> +	dev->parent = &cxlr->dev;
> +	dev->bus = &cxl_bus_type;
> +	dev->type = &cxl_dax_region_type;
> +
> +	return cxlr_dax;
> +}
> +
> +static void cxlr_dax_unregister(void *_cxlr_dax)
> +{
> +	struct cxl_dax_region *cxlr_dax = _cxlr_dax;
> +
> +	device_unregister(&cxlr_dax->dev);
> +}
> +
> +/*
> + * The dax controller is the default controller and simply hands the
> + * control pattern over to the dax driver.  It does with a dax_region
> + * built by dax/cxl.c
> + */
> +int devm_cxl_add_dax_region(struct cxl_region *cxlr)
> +{
> +	struct cxl_dax_region *cxlr_dax;
> +	struct device *dev;
> +	int rc;
> +
> +	cxlr_dax = cxl_dax_region_alloc(cxlr);
> +	if (IS_ERR(cxlr_dax))
> +		return PTR_ERR(cxlr_dax);
> +
> +	dev = &cxlr_dax->dev;
> +	rc = dev_set_name(dev, "dax_region%d", cxlr->id);
> +	if (rc)
> +		goto err;
> +
> +	rc = device_add(dev);
> +	if (rc)
> +		goto err;
> +
> +	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
> +		dev_name(dev));
> +
> +	return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
> +					cxlr_dax);
> +err:
> +	put_device(dev);
> +	return rc;
> +}
> diff --git a/drivers/cxl/core/memctrl/memctrl.c b/drivers/cxl/core/memctrl/memctrl.c
> new file mode 100644
> index 000000000000..24e0e14b39c7
> --- /dev/null
> +++ b/drivers/cxl/core/memctrl/memctrl.c
> @@ -0,0 +1,42 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/* Copyright(c) 2022 Intel Corporation. All rights reserved. */
> +/* Copyright(c) 2026 Meta Inc. All rights reserved. */
> +#include <linux/device.h>
> +#include <linux/ioport.h>
> +#include <cxlmem.h>
> +#include <cxl.h>
> +#include "../core.h"
> +
> +static int is_system_ram(struct resource *res, void *arg)
> +{
> +	struct cxl_region *cxlr = arg;
> +	struct cxl_region_params *p = &cxlr->params;
> +
> +	dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
> +	return 1;
> +}
> +
> +int cxl_enable_memctrl(struct cxl_region *cxlr)
> +{
> +	struct cxl_region_params *p = &cxlr->params;
> +
> +	switch (cxlr->memctrl) {
> +	case CXL_MEMCTRL_AUTO:
> +		/*
> +		 * The region can not be manged by CXL if any portion of

s/manged/managed. May as well fix it since it's being moved.
> +		 * it is already online as 'System RAM'
> +		 */
> +		if (walk_iomem_res_desc(IORES_DESC_NONE,
> +					IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
> +					p->res->start, p->res->end, cxlr,
> +					is_system_ram) > 0)
> +			return 0;
> +		return devm_cxl_add_dax_region(cxlr);

If you take my suggestion at the top about removing MEMCTRL_AUTO, this case become
the MEMCTRL_DAX case.

> +	case CXL_MEMCTRL_DAX:
> +		return devm_cxl_add_dax_region(cxlr);
> +	default:
> +		return -EINVAL;
> +	}
> +}
> +
> +
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index ae899f68551f..02d7d9ae0252 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -626,6 +626,50 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr,
>  }
>  static DEVICE_ATTR_RO(mode);
>  
> +static ssize_t ctrl_show(struct device *dev, struct device_attribute *attr,
> +			 char *buf)
> +{
> +	struct cxl_region *cxlr = to_cxl_region(dev);
> +	const char *desc;
> +
> +	switch (cxlr->memctrl) {
> +	case CXL_MEMCTRL_AUTO:
> +		desc = "auto";
> +		break;
> +	case CXL_MEMCTRL_DAX:
> +		desc = "dax";
> +		break;
> +	default:
> +		desc = "";

Nit: I would prefer this say "none" instead of being blank to match the code.
> +		break;
> +	}
> +
> +	return sysfs_emit(buf, "%s\n", desc);
> +}
> +
> +static ssize_t ctrl_store(struct device *dev, struct device_attribute *attr,
> +			  const char *buf, size_t len)
> +{
> +	struct cxl_region *cxlr = to_cxl_region(dev);
> +	struct cxl_region_params *p = &cxlr->params;
> +	int rc;
> +
> +	ACQUIRE(rwsem_write_kill, rwsem)(&cxl_rwsem.region);
> +	if ((rc = ACQUIRE_ERR(rwsem_write_kill, &rwsem)))
> +		return rc;
> +
> +	if (p->state >= CXL_CONFIG_COMMIT)
> +		return -EBUSY;
> +
> +	if (sysfs_streq(buf, "dax"))
> +		cxlr->memctrl = CXL_MEMCTRL_DAX;
> +	else
> +		return -EINVAL;
> +
> +	return len;
> +}
> +static DEVICE_ATTR_RW(ctrl);
> +
>  static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size)
>  {
>  	struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent);
> @@ -772,6 +816,7 @@ static struct attribute *cxl_region_attrs[] = {
>  	&dev_attr_size.attr,
>  	&dev_attr_mode.attr,
>  	&dev_attr_extended_linear_cache_size.attr,
> +	&dev_attr_ctrl.attr,
>  	NULL,
>  };
>  
> @@ -2598,6 +2643,7 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
>  		return cxlr;
>  	cxlr->mode = mode;
>  	cxlr->type = type;
> +	cxlr->memctrl = CXL_MEMCTRL_NONE;
>  
>  	dev = &cxlr->dev;
>  	rc = dev_set_name(dev, "region%d", id);
> @@ -3307,37 +3353,6 @@ struct cxl_dax_region *to_cxl_dax_region(struct device *dev)
>  }
>  EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, "CXL");
>  
> -static struct lock_class_key cxl_dax_region_key;
> -
> -static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr)
> -{
> -	struct cxl_region_params *p = &cxlr->params;
> -	struct cxl_dax_region *cxlr_dax;
> -	struct device *dev;
> -
> -	guard(rwsem_read)(&cxl_rwsem.region);
> -	if (p->state != CXL_CONFIG_COMMIT)
> -		return ERR_PTR(-ENXIO);
> -
> -	cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL);
> -	if (!cxlr_dax)
> -		return ERR_PTR(-ENOMEM);
> -
> -	cxlr_dax->hpa_range.start = p->res->start;
> -	cxlr_dax->hpa_range.end = p->res->end;
> -
> -	dev = &cxlr_dax->dev;
> -	cxlr_dax->cxlr = cxlr;
> -	device_initialize(dev);
> -	lockdep_set_class(&dev->mutex, &cxl_dax_region_key);
> -	device_set_pm_not_required(dev);
> -	dev->parent = &cxlr->dev;
> -	dev->bus = &cxl_bus_type;
> -	dev->type = &cxl_dax_region_type;
> -
> -	return cxlr_dax;
> -}
> -
>  static void cxlr_pmem_unregister(void *_cxlr_pmem)
>  {
>  	struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem;
> @@ -3424,42 +3439,6 @@ static int devm_cxl_add_pmem_region(struct cxl_region *cxlr)
>  	return rc;
>  }
>  
> -static void cxlr_dax_unregister(void *_cxlr_dax)
> -{
> -	struct cxl_dax_region *cxlr_dax = _cxlr_dax;
> -
> -	device_unregister(&cxlr_dax->dev);
> -}
> -
> -static int devm_cxl_add_dax_region(struct cxl_region *cxlr)
> -{
> -	struct cxl_dax_region *cxlr_dax;
> -	struct device *dev;
> -	int rc;
> -
> -	cxlr_dax = cxl_dax_region_alloc(cxlr);
> -	if (IS_ERR(cxlr_dax))
> -		return PTR_ERR(cxlr_dax);
> -
> -	dev = &cxlr_dax->dev;
> -	rc = dev_set_name(dev, "dax_region%d", cxlr->id);
> -	if (rc)
> -		goto err;
> -
> -	rc = device_add(dev);
> -	if (rc)
> -		goto err;
> -
> -	dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent),
> -		dev_name(dev));
> -
> -	return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister,
> -					cxlr_dax);
> -err:
> -	put_device(dev);
> -	return rc;
> -}
> -
>  static int match_decoder_by_range(struct device *dev, const void *data)
>  {
>  	const struct range *r1, *r2 = data;
> @@ -3579,6 +3558,9 @@ static int __construct_region(struct cxl_region *cxlr,
>  
>  	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
>  
> +	/* Auto-regions will either be static sysram (onlined by BIOS) or DAX */
> +	cxlr->memctrl = CXL_MEMCTRL_AUTO;

And this would be set to CXL_MEM_CTRL_DAX instead.

> +
>  	res = kmalloc(sizeof(*res), GFP_KERNEL);
>  	if (!res)
>  		return -ENOMEM;
> @@ -3755,15 +3737,6 @@ u64 cxl_port_get_spa_cache_alias(struct cxl_port *endpoint, u64 spa)
>  }
>  EXPORT_SYMBOL_NS_GPL(cxl_port_get_spa_cache_alias, "CXL");
>  
> -static int is_system_ram(struct resource *res, void *arg)
> -{
> -	struct cxl_region *cxlr = arg;
> -	struct cxl_region_params *p = &cxlr->params;
> -
> -	dev_dbg(&cxlr->dev, "%pr has System RAM: %pr\n", p->res, res);
> -	return 1;
> -}
> -
>  static void shutdown_notifiers(void *_cxlr)
>  {
>  	struct cxl_region *cxlr = _cxlr;
> @@ -3965,16 +3938,7 @@ static int cxl_region_probe(struct device *dev)
>  			dev_dbg(&cxlr->dev, "CXL EDAC registration for region_id=%d failed\n",
>  				cxlr->id);
>  
> -		/*
> -		 * The region can not be manged by CXL if any portion of
> -		 * it is already online as 'System RAM'
> -		 */
> -		if (walk_iomem_res_desc(IORES_DESC_NONE,
> -					IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
> -					p->res->start, p->res->end, cxlr,
> -					is_system_ram) > 0)
> -			return 0;
> -		return devm_cxl_add_dax_region(cxlr);
> +		return cxl_enable_memctrl(cxlr);
>  	default:
>  		dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
>  			cxlr->mode);
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index ba17fa86d249..b8fabaa77262 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -502,6 +502,19 @@ enum cxl_partition_mode {
>  	CXL_PARTMODE_PMEM,
>  };
>  
> +
> +/*
> + * Memory Controller modes:
> + *   None - No controller selected
> + *   Auto - either BIOS-configured as SysRAM, or default to DAX
> + *   DAX  - creates a dax_region controller for the cxl_region
> + */
> +enum cxl_memctrl_mode {
> +	CXL_MEMCTRL_NONE,
> +	CXL_MEMCTRL_AUTO,
> +	CXL_MEMCTRL_DAX,
> +};
> +
>  /*
>   * Indicate whether this region has been assembled by autodetection or
>   * userspace assembly. Prevent endpoint decoders outside of automatic
> @@ -543,6 +556,7 @@ struct cxl_region {
>  	struct device dev;
>  	int id;
>  	enum cxl_partition_mode mode;
> +	enum cxl_memctrl_mode memctrl;
>  	enum cxl_decoder_type type;
>  	struct cxl_nvdimm_bridge *cxl_nvb;
>  	struct cxl_pmem_region *cxlr_pmem;
Re: [PATCH 1/6] drivers/cxl: add cxl_memctrl_mode and region->memctrl
Posted by Gregory Price 3 weeks, 6 days ago
On Mon, Jan 12, 2026 at 03:10:29PM -0600, Cheatham, Benjamin wrote:
> On 1/12/2026 10:35 AM, Gregory Price wrote:
> > The CXL driver presently hands policy management over to DAX subsystem
> > for sysram regions, which makes building policy around the entire region
> > clunky and at time difficult (e.g. multiple actions to offline and
> > hot-unplug memory reliably).
> > 
> > To support multiple backend controllers for memory regions (for example
> > dax vs direct hotplug), implement a memctrl field in cxl_region allows
> > switching uncomitted regions between different "memory controllers".
> > 
> > CXL_CONTROL_NONE: No selected controller, probe will fail.
> > CXL_CONTROL_AUTO: If memory is already online as SysRAM, no controller
> >                   otherwise register a dax_region
> 
> I think you can streamline this to get rid of CXL_CONTROL_AUTO. If BIOS set up
> the memory you can just set the mode to CXL_CONTROL_NONE, otherwise use CXL_CONTROL_DAX.
> This patch would be a bit less complicated at the very least, and I don't think it
> would require much reworking of later patches.
>

I suppose if it's an auto-region (CXL_REGION_F_AUTO) we can default to
DAX if the SP bit is set (memory is soft-reserved).

I was hoping to quietly convert our systems from the dax driver to the
sysram driver without too much hubub, but this would require our old
systems to implement a udev or chef thing to online the memory after
boot.  Otherwise I can't ditch the MHP_ auto-online flags to
auto-online.

But yeah I won't fight this too much, just need to think about it.  I
can always ditch it and bring auto back later if there's something that
breaks unexpectedly.

> > +	switch (cxlr->memctrl) {
> > +	case CXL_MEMCTRL_AUTO:
> > +		/*
> > +		 * The region can not be manged by CXL if any portion of
> 
> s/manged/managed. May as well fix it since it's being moved.

ack.

> > +		 * it is already online as 'System RAM'
> > +		 */
> > +		if (walk_iomem_res_desc(IORES_DESC_NONE,
> > +					IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY,
> > +					p->res->start, p->res->end, cxlr,
> > +					is_system_ram) > 0)
> > +			return 0;
> > +		return devm_cxl_add_dax_region(cxlr);
> 
> If you take my suggestion at the top about removing MEMCTRL_AUTO, this case become
> the MEMCTRL_DAX case.
>

ack.

> > +	default:
> > +		desc = "";
> 
> Nit: I would prefer this say "none" instead of being blank to match the code.

reasonable

> > @@ -3579,6 +3558,9 @@ static int __construct_region(struct cxl_region *cxlr,
> >  
> >  	set_bit(CXL_REGION_F_AUTO, &cxlr->flags);
> >  
> > +	/* Auto-regions will either be static sysram (onlined by BIOS) or DAX */
> > +	cxlr->memctrl = CXL_MEMCTRL_AUTO;
> 
> And this would be set to CXL_MEM_CTRL_DAX instead.
> 

Mmmm, hm.  I guess if the region is already online, we'll revert this to
none at probe time, so yeah that should work.

There may be some kind of management we want to add to auto-regions
onlined by BIOS (e.g. no SP bit), but we can add AUTO in if we ever
discover that use-case and argue for it then.

Thanks,
~Gregory
Re: [PATCH 1/6] drivers/cxl: add cxl_memctrl_mode and region->memctrl
Posted by dan.j.williams@intel.com 4 weeks ago
Gregory Price wrote:
> The CXL driver presently hands policy management over to DAX subsystem
> for sysram regions, which makes building policy around the entire region
> clunky and at time difficult (e.g. multiple actions to offline and
> hot-unplug memory reliably).
> 
> To support multiple backend controllers for memory regions (for example
> dax vs direct hotplug), implement a memctrl field in cxl_region allows
> switching uncomitted regions between different "memory controllers".
> 
> CXL_CONTROL_NONE: No selected controller, probe will fail.
> CXL_CONTROL_AUTO: If memory is already online as SysRAM, no controller
>                   otherwise register a dax_region
> CXL_CONTROL_DAX : register a dax_region
> 
> Auto regions will either be static sysram (BIOS-onlined) and has no
> region controller associated with it - or if the SP bit was set a
> DAX device will be created.
> 
> Rather than default all regions to the auto-controller, only default
> auto-regions to the auto controller.
> 
> Non-auto regions will be defaulted to CXL_CONTROL_NONE, which will cause
> a failure to probe unless a controller is selected.
> 
> Signed-off-by: Gregory Price <gourry@gourry.net>
> ---
>  drivers/cxl/core/Makefile             |   1 +
>  drivers/cxl/core/core.h               |   2 +
>  drivers/cxl/core/memctrl/Makefile     |   4 +
>  drivers/cxl/core/memctrl/dax_region.c |  79 +++++++++++++++
>  drivers/cxl/core/memctrl/memctrl.c    |  42 ++++++++
>  drivers/cxl/core/region.c             | 136 ++++++++++----------------
>  drivers/cxl/cxl.h                     |  14 +++
>  7 files changed, 192 insertions(+), 86 deletions(-)
>  create mode 100644 drivers/cxl/core/memctrl/Makefile
>  create mode 100644 drivers/cxl/core/memctrl/dax_region.c
>  create mode 100644 drivers/cxl/core/memctrl/memctrl.c
> 
> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
> index 5ad8fef210b5..79de20e3f8aa 100644
> --- a/drivers/cxl/core/Makefile
> +++ b/drivers/cxl/core/Makefile
> @@ -17,6 +17,7 @@ cxl_core-y += cdat.o
>  cxl_core-y += ras.o
>  cxl_core-$(CONFIG_TRACING) += trace.o
>  cxl_core-$(CONFIG_CXL_REGION) += region.o
> +include $(src)/memctrl/Makefile

Not sure this merits its own directory, but if it does just do the
canonical:

obj-y += memctrl/

...to add an object-sub-directory.
 
>  cxl_core-$(CONFIG_CXL_MCE) += mce.o
>  cxl_core-$(CONFIG_CXL_FEATURES) += features.o
>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
> index 1fb66132b777..1156a4bd0080 100644
> --- a/drivers/cxl/core/core.h
> +++ b/drivers/cxl/core/core.h
> @@ -42,6 +42,8 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port);
>  struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa);
>  u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
>  		   u64 dpa);
> +int cxl_enable_memctrl(struct cxl_region *cxlr);

This is a "probe" operation not an "enable" in terms of runtime ABI and
presentation that starts decorating the region. In that respect it also
is not a "control" as much as an "operation model / driver". So no need
for a "control" concept, i.e.:

s/CXL_CONTROL_{NONE,AUTO,DAX}/CXL_DRIVER_{NONE,AUTO,DAX}/
s/enum cxl_memctrl_mode/enum cxl_region_driver/

...otherwise there is nothing in this proposal that makes me want to
abandon the traditional meaning of a "driver" probing a "resource" in a
certain way to make it usable with the rest of the kernel.

Rest of this looks fine. With that fixup if we are going to have a set
of different region driver modes then the directory can be:

drivers/cxl/core/region/
Re: [PATCH 1/6] drivers/cxl: add cxl_memctrl_mode and region->memctrl
Posted by Dave Jiang 3 weeks, 6 days ago

On 1/12/26 1:59 PM, dan.j.williams@intel.com wrote:
> Gregory Price wrote:
>> The CXL driver presently hands policy management over to DAX subsystem
>> for sysram regions, which makes building policy around the entire region
>> clunky and at time difficult (e.g. multiple actions to offline and
>> hot-unplug memory reliably).
>>
>> To support multiple backend controllers for memory regions (for example
>> dax vs direct hotplug), implement a memctrl field in cxl_region allows
>> switching uncomitted regions between different "memory controllers".
>>
>> CXL_CONTROL_NONE: No selected controller, probe will fail.
>> CXL_CONTROL_AUTO: If memory is already online as SysRAM, no controller
>>                   otherwise register a dax_region
>> CXL_CONTROL_DAX : register a dax_region
>>
>> Auto regions will either be static sysram (BIOS-onlined) and has no
>> region controller associated with it - or if the SP bit was set a
>> DAX device will be created.
>>
>> Rather than default all regions to the auto-controller, only default
>> auto-regions to the auto controller.
>>
>> Non-auto regions will be defaulted to CXL_CONTROL_NONE, which will cause
>> a failure to probe unless a controller is selected.
>>
>> Signed-off-by: Gregory Price <gourry@gourry.net>
>> ---
>>  drivers/cxl/core/Makefile             |   1 +
>>  drivers/cxl/core/core.h               |   2 +
>>  drivers/cxl/core/memctrl/Makefile     |   4 +
>>  drivers/cxl/core/memctrl/dax_region.c |  79 +++++++++++++++
>>  drivers/cxl/core/memctrl/memctrl.c    |  42 ++++++++
>>  drivers/cxl/core/region.c             | 136 ++++++++++----------------
>>  drivers/cxl/cxl.h                     |  14 +++
>>  7 files changed, 192 insertions(+), 86 deletions(-)
>>  create mode 100644 drivers/cxl/core/memctrl/Makefile
>>  create mode 100644 drivers/cxl/core/memctrl/dax_region.c
>>  create mode 100644 drivers/cxl/core/memctrl/memctrl.c
>>
>> diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
>> index 5ad8fef210b5..79de20e3f8aa 100644
>> --- a/drivers/cxl/core/Makefile
>> +++ b/drivers/cxl/core/Makefile
>> @@ -17,6 +17,7 @@ cxl_core-y += cdat.o
>>  cxl_core-y += ras.o
>>  cxl_core-$(CONFIG_TRACING) += trace.o
>>  cxl_core-$(CONFIG_CXL_REGION) += region.o
>> +include $(src)/memctrl/Makefile
> 
> Not sure this merits its own directory, but if it does just do the
> canonical:
> 
> obj-y += memctrl/
> 
> ...to add an object-sub-directory.
>  
>>  cxl_core-$(CONFIG_CXL_MCE) += mce.o
>>  cxl_core-$(CONFIG_CXL_FEATURES) += features.o
>>  cxl_core-$(CONFIG_CXL_EDAC_MEM_FEATURES) += edac.o
>> diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
>> index 1fb66132b777..1156a4bd0080 100644
>> --- a/drivers/cxl/core/core.h
>> +++ b/drivers/cxl/core/core.h
>> @@ -42,6 +42,8 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port);
>>  struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa);
>>  u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
>>  		   u64 dpa);
>> +int cxl_enable_memctrl(struct cxl_region *cxlr);
> 
> This is a "probe" operation not an "enable" in terms of runtime ABI and
> presentation that starts decorating the region. In that respect it also
> is not a "control" as much as an "operation model / driver". So no need
> for a "control" concept, i.e.:
> 
> s/CXL_CONTROL_{NONE,AUTO,DAX}/CXL_DRIVER_{NONE,AUTO,DAX}/
> s/enum cxl_memctrl_mode/enum cxl_region_driver/
> 
> ...otherwise there is nothing in this proposal that makes me want to
> abandon the traditional meaning of a "driver" probing a "resource" in a
> certain way to make it usable with the rest of the kernel.
> 
> Rest of this looks fine. With that fixup if we are going to have a set
> of different region driver modes then the directory can be:
> 
> drivers/cxl/core/region/

Do we still have reasons to keep the region drivers in core? I know Fabio has been looking at moving the region drivers to drivers/cxl/ so the LMH cxl_test stuff doesn't doesn't need to do all the weird stuff to make it work. Maybe we just do the refactor now and move the region drivers outside of core. How about drivers/cxl/region/?
Re: [PATCH 1/6] drivers/cxl: add cxl_memctrl_mode and region->memctrl
Posted by dan.j.williams@intel.com 3 weeks, 5 days ago
Dave Jiang wrote:
[..]
> > Rest of this looks fine. With that fixup if we are going to have a set
> > of different region driver modes then the directory can be:
> > 
> > drivers/cxl/core/region/
> 
> Do we still have reasons to keep the region drivers in core? I know
> Fabio has been looking at moving the region drivers to drivers/cxl/ so
> the LMH cxl_test stuff doesn't doesn't need to do all the weird stuff
> to make it work. Maybe we just do the refactor now and move the region
> drivers outside of core. How about drivers/cxl/region/?

Not opposed to making it a module, just need to make sure that all
potential region drivers are loaded prior to the first
cxl_add_to_region() call. Otherwise, this breaks the expectation that
auto-assembly of regions present at boot can be flushed by
wait_for_device_probe(). Specifically, userspace module loading requests
are not flushed by this helper, only direct symbol dependencies.

It would be lovely if we had a unit test that specifically checked for
regressions like this, because the race can be difficult to see.
Re: [PATCH 1/6] drivers/cxl: add cxl_memctrl_mode and region->memctrl
Posted by Gregory Price 3 weeks, 6 days ago
On Tue, Jan 13, 2026 at 11:00:42AM -0700, Dave Jiang wrote:
> 
> > 
> > This is a "probe" operation not an "enable" in terms of runtime ABI and
> > presentation that starts decorating the region. In that respect it also
> > is not a "control" as much as an "operation model / driver". So no need
> > for a "control" concept, i.e.:
> > 
> > s/CXL_CONTROL_{NONE,AUTO,DAX}/CXL_DRIVER_{NONE,AUTO,DAX}/
> > s/enum cxl_memctrl_mode/enum cxl_region_driver/
> > 
> > ...otherwise there is nothing in this proposal that makes me want to
> > abandon the traditional meaning of a "driver" probing a "resource" in a
> > certain way to make it usable with the rest of the kernel.
> > 
> > Rest of this looks fine. With that fixup if we are going to have a set
> > of different region driver modes then the directory can be:
> > 
> > drivers/cxl/core/region/
> 
> Do we still have reasons to keep the region drivers in core? I know Fabio has been looking at moving the region drivers to drivers/cxl/ so the LMH cxl_test stuff doesn't doesn't need to do all the weird stuff to make it work. Maybe we just do the refactor now and move the region drivers outside of core. How about drivers/cxl/region/?
> 

I pulled them out into drivers/cxl/core for now, but this is a trivial
move.  I think it may still use some core.h functions, so it would be
more work to pull them all the way out into cxl.

will submit a v2 here shortly with just the refactor work w/ the new
sysfs entry (but no sysram driver, going to defer that to a separate
line on top of this).

~Gregory
Re: [PATCH 1/6] drivers/cxl: add cxl_memctrl_mode and region->memctrl
Posted by Gregory Price 3 weeks, 6 days ago
On Mon, Jan 12, 2026 at 12:59:44PM -0800, dan.j.williams@intel.com wrote:
> Gregory Price wrote:
> > --- a/drivers/cxl/core/core.h
> > +++ b/drivers/cxl/core/core.h
> > @@ -42,6 +42,8 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port);
> >  struct cxl_region *cxl_dpa_to_region(const struct cxl_memdev *cxlmd, u64 dpa);
> >  u64 cxl_dpa_to_hpa(struct cxl_region *cxlr, const struct cxl_memdev *cxlmd,
> >  		   u64 dpa);
> > +int cxl_enable_memctrl(struct cxl_region *cxlr);
> 
> This is a "probe" operation not an "enable" in terms of runtime ABI and
> presentation that starts decorating the region. In that respect it also
> is not a "control" as much as an "operation model / driver". So no need
> for a "control" concept, i.e.:
> 
> s/CXL_CONTROL_{NONE,AUTO,DAX}/CXL_DRIVER_{NONE,AUTO,DAX}/
> s/enum cxl_memctrl_mode/enum cxl_region_driver/
> 

Ack.

> ...otherwise there is nothing in this proposal that makes me want to
> abandon the traditional meaning of a "driver" probing a "resource" in a
> certain way to make it usable with the rest of the kernel.
> 
> Rest of this looks fine. With that fixup if we are going to have a set
> of different region driver modes then the directory can be:
> 
> drivers/cxl/core/region/

Mostly I imagine we'll end up with at least 5-6 of these, and the
directory makes it clear for any new comers "This is what you must do if
you want a new region".

but if folks would rather have it directly in core/ then `git mv...` 

either way, ack.

~Gregory