Stop creating cxl_dax during cxl_region_probe(). Early DAX registration
can online memory before ownership of Soft Reserved ranges is finalized.
This makes it difficult to tear down regions later when HMEM determines
that a region should not claim that range.
Introduce a register_dax flag in struct cxl_region_params and gate DAX
registration on this flag. Leave probe time registration disabled for
regions discovered during early CXL enumeration; set the flag only for
regions created dynamically at runtime to preserve existing behaviour.
This patch prepares the region code for later changes where cxl_dax
setup occurs from the HMEM path only after ownership arbitration
completes.
Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
---
drivers/cxl/core/region.c | 21 ++++++++++++++++-----
drivers/cxl/cxl.h | 1 +
2 files changed, 17 insertions(+), 5 deletions(-)
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 94dbbd6b5513..c17cd8706b9d 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2540,9 +2540,11 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb,
static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
int id,
enum cxl_partition_mode mode,
- enum cxl_decoder_type type)
+ enum cxl_decoder_type type,
+ bool register_dax)
{
struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
+ struct cxl_region_params *p;
struct cxl_region *cxlr;
struct device *dev;
int rc;
@@ -2553,6 +2555,9 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
cxlr->mode = mode;
cxlr->type = type;
+ p = &cxlr->params;
+ p->register_dax = register_dax;
+
dev = &cxlr->dev;
rc = dev_set_name(dev, "region%d", id);
if (rc)
@@ -2593,7 +2598,8 @@ static ssize_t create_ram_region_show(struct device *dev,
}
static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
- enum cxl_partition_mode mode, int id)
+ enum cxl_partition_mode mode, int id,
+ bool register_dax)
{
int rc;
@@ -2615,7 +2621,8 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
return ERR_PTR(-EBUSY);
}
- return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM);
+ return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM,
+ register_dax);
}
static ssize_t create_region_store(struct device *dev, const char *buf,
@@ -2629,7 +2636,7 @@ static ssize_t create_region_store(struct device *dev, const char *buf,
if (rc != 1)
return -EINVAL;
- cxlr = __create_region(cxlrd, mode, id);
+ cxlr = __create_region(cxlrd, mode, id, true);
if (IS_ERR(cxlr))
return PTR_ERR(cxlr);
@@ -3523,7 +3530,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
do {
cxlr = __create_region(cxlrd, cxlds->part[part].mode,
- atomic_read(&cxlrd->region_id));
+ atomic_read(&cxlrd->region_id), false);
} while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
if (IS_ERR(cxlr)) {
@@ -3930,6 +3937,10 @@ static int cxl_region_probe(struct device *dev)
p->res->start, p->res->end, cxlr,
is_system_ram) > 0)
return 0;
+
+ if (!p->register_dax)
+ return 0;
+
return devm_cxl_add_dax_region(cxlr);
default:
dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index af78c9fd37f2..324220596890 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -495,6 +495,7 @@ struct cxl_region_params {
struct cxl_endpoint_decoder *targets[CXL_DECODER_MAX_INTERLEAVE];
int nr_targets;
resource_size_t cache_size;
+ bool register_dax;
};
enum cxl_partition_mode {
--
2.17.1
Smita Koralahalli wrote: > Stop creating cxl_dax during cxl_region_probe(). Early DAX registration > can online memory before ownership of Soft Reserved ranges is finalized. > This makes it difficult to tear down regions later when HMEM determines > that a region should not claim that range. > > Introduce a register_dax flag in struct cxl_region_params and gate DAX > registration on this flag. Leave probe time registration disabled for > regions discovered during early CXL enumeration; set the flag only for > regions created dynamically at runtime to preserve existing behaviour. > > This patch prepares the region code for later changes where cxl_dax > setup occurs from the HMEM path only after ownership arbitration > completes. This seems backwards to me. The dax subsystem knows when it wants to move ahead with CXL or not, dax_cxl_mode is that indicator. So, just share that variable with drivers/dax/cxl.c, arrange for cxl_dax_region_probe() to fail while waiting for initial CXL probing to succeed. Once that point is reached move dax_cxl_mode to DAX_CXL_MODE_DROP, which means drop the hmem alias, and go with the real-deal CXL region. Rescan the dax-bus to retry cxl_dax_region_probe(). No need to bother 'struct cxl_region' with a 'dax' flag, it just registers per normal and lets the dax-subsystem handle accepting / rejecting. Now, we do need a mechanism from dax-to-cxl to trigger region removal in the DAX_CXL_MODE_REGISTER case (proceed with the hmem registration), but that is separate from blocking the attachment of dax to CXL regions. Keep all that complexity local to dax.
On 12/3/2025 4:22 PM, dan.j.williams@intel.com wrote:
> Smita Koralahalli wrote:
>> Stop creating cxl_dax during cxl_region_probe(). Early DAX registration
>> can online memory before ownership of Soft Reserved ranges is finalized.
>> This makes it difficult to tear down regions later when HMEM determines
>> that a region should not claim that range.
>>
>> Introduce a register_dax flag in struct cxl_region_params and gate DAX
>> registration on this flag. Leave probe time registration disabled for
>> regions discovered during early CXL enumeration; set the flag only for
>> regions created dynamically at runtime to preserve existing behaviour.
>>
>> This patch prepares the region code for later changes where cxl_dax
>> setup occurs from the HMEM path only after ownership arbitration
>> completes.
>
> This seems backwards to me. The dax subsystem knows when it wants to
> move ahead with CXL or not, dax_cxl_mode is that indicator. So, just
> share that variable with drivers/dax/cxl.c, arrange for
> cxl_dax_region_probe() to fail while waiting for initial CXL probing to
> succeed.
>
> Once that point is reached move dax_cxl_mode to DAX_CXL_MODE_DROP, which
> means drop the hmem alias, and go with the real-deal CXL region. Rescan
> the dax-bus to retry cxl_dax_region_probe(). No need to bother 'struct
> cxl_region' with a 'dax' flag, it just registers per normal and lets the
> dax-subsystem handle accepting / rejecting.
>
> Now, we do need a mechanism from dax-to-cxl to trigger region removal in
> the DAX_CXL_MODE_REGISTER case (proceed with the hmem registration), but
> that is separate from blocking the attachment of dax to CXL regions.
> Keep all that complexity local to dax.
Okay. To make sure I'm aligned with your suggestion.
It should be something like below in cxl_dax_region_probe():
switch (dax_cxl_mode) {
case DAX_CXL_MODE_DEFER:
return -EPROBE_DEFER;
case DAX_CXL_MODE_REGISTER:
return -ENODEV;
case DAX_CXL_MODE_DROP:
default:
break;
}
Then in the HMEM path, if the SR span is fully covered I will switch to
DAX_CXL_MODE_DROP and trigger a rescan.
Something like:
if (cxl_regions_fully_map(res->start, res->end)) {
dax_cxl_mode = DAX_CXL_MODE_DROP;
bus_rescan_devices(&cxl_bus_type);
} else {
dax_cxl_mode = DAX_CXL_MODE_REGISTER;
cxl_region_teardown(res->start, res->end);
}
hmem_register_device(host, target_nid, res);
cxl_regions_fully_map() will include changes as suggested in Patch 5.
Thanks
Smita
Hi Smita, kernel test robot noticed the following build warnings: [auto build test WARNING on 211ddde0823f1442e4ad052a2f30f050145ccada] url: https://github.com/intel-lab-lkp/linux/commits/Smita-Koralahalli/dax-hmem-e820-resource-Defer-Soft-Reserved-insertion-until-hmem-is-ready/20251120-112457 base: 211ddde0823f1442e4ad052a2f30f050145ccada patch link: https://lore.kernel.org/r/20251120031925.87762-7-Smita.KoralahalliChannabasappa%40amd.com patch subject: [PATCH v4 6/9] cxl/region: Add register_dax flag to defer DAX setup config: sparc64-randconfig-6002-20251120 (https://download.01.org/0day-ci/archive/20251121/202511210343.c0vb4NRc-lkp@intel.com/config) compiler: sparc64-linux-gcc (GCC) 13.4.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251121/202511210343.c0vb4NRc-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202511210343.c0vb4NRc-lkp@intel.com/ All warnings (new ones prefixed by >>): >> Warning: drivers/cxl/core/region.c:2544 function parameter 'register_dax' not described in 'devm_cxl_add_region' -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On 11/19/2025 7:19 PM, Smita Koralahalli wrote:
> Stop creating cxl_dax during cxl_region_probe(). Early DAX registration
> can online memory before ownership of Soft Reserved ranges is finalized.
> This makes it difficult to tear down regions later when HMEM determines
> that a region should not claim that range.
>
> Introduce a register_dax flag in struct cxl_region_params and gate DAX
> registration on this flag. Leave probe time registration disabled for
> regions discovered during early CXL enumeration; set the flag only for
> regions created dynamically at runtime to preserve existing behaviour.
>
> This patch prepares the region code for later changes where cxl_dax
> setup occurs from the HMEM path only after ownership arbitration
> completes.
>
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> ---
> drivers/cxl/core/region.c | 21 ++++++++++++++++-----
> drivers/cxl/cxl.h | 1 +
> 2 files changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 94dbbd6b5513..c17cd8706b9d 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2540,9 +2540,11 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb,
> static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
> int id,
> enum cxl_partition_mode mode,
> - enum cxl_decoder_type type)
> + enum cxl_decoder_type type,
> + bool register_dax)
> {
> struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
> + struct cxl_region_params *p;
> struct cxl_region *cxlr;
> struct device *dev;
> int rc;
> @@ -2553,6 +2555,9 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
> cxlr->mode = mode;
> cxlr->type = type;
>
> + p = &cxlr->params;
> + p->register_dax = register_dax;
> +
> dev = &cxlr->dev;
> rc = dev_set_name(dev, "region%d", id);
> if (rc)
> @@ -2593,7 +2598,8 @@ static ssize_t create_ram_region_show(struct device *dev,
> }
>
> static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
> - enum cxl_partition_mode mode, int id)
> + enum cxl_partition_mode mode, int id,
> + bool register_dax)
> {
> int rc;
>
> @@ -2615,7 +2621,8 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
> return ERR_PTR(-EBUSY);
> }
>
> - return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM);
> + return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM,
> + register_dax);
> }
>
> static ssize_t create_region_store(struct device *dev, const char *buf,
> @@ -2629,7 +2636,7 @@ static ssize_t create_region_store(struct device *dev, const char *buf,
> if (rc != 1)
> return -EINVAL;
>
> - cxlr = __create_region(cxlrd, mode, id);
> + cxlr = __create_region(cxlrd, mode, id, true);
> if (IS_ERR(cxlr))
> return PTR_ERR(cxlr);
>
> @@ -3523,7 +3530,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>
> do {
> cxlr = __create_region(cxlrd, cxlds->part[part].mode,
> - atomic_read(&cxlrd->region_id));
> + atomic_read(&cxlrd->region_id), false);
> } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
>
> if (IS_ERR(cxlr)) {
> @@ -3930,6 +3937,10 @@ static int cxl_region_probe(struct device *dev)
> p->res->start, p->res->end, cxlr,
> is_system_ram) > 0)
> return 0;
> +
> + if (!p->register_dax)
> + return 0;
Sorry, I missed this. It should continue registering DAX if HMEM is
disabled. I will fix this in v5 and add a comment here
- if (!p->register_dax)
- return 0;
+ /*
+ * Only skip probe time DAX if HMEM will handle it
+ * later.
+ */
+ if (IS_ENABLED(CONFIG_DEV_DAX_HMEM) && !p->register_dax)
+ return 0;
> +
> return devm_cxl_add_dax_region(cxlr);
> default:
> dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index af78c9fd37f2..324220596890 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -495,6 +495,7 @@ struct cxl_region_params {
> struct cxl_endpoint_decoder *targets[CXL_DECODER_MAX_INTERLEAVE];
> int nr_targets;
> resource_size_t cache_size;
> + bool register_dax;
> };
>
> enum cxl_partition_mode {
© 2016 - 2026 Red Hat, Inc.