[PATCH v4 6/9] cxl/region: Add register_dax flag to defer DAX setup

Smita Koralahalli posted 9 patches 2 months, 3 weeks ago
There is a newer version of this series
[PATCH v4 6/9] cxl/region: Add register_dax flag to defer DAX setup
Posted by Smita Koralahalli 2 months, 3 weeks ago
Stop creating cxl_dax during cxl_region_probe(). Early DAX registration
can online memory before ownership of Soft Reserved ranges is finalized.
This makes it difficult to tear down regions later when HMEM determines
that a region should not claim that range.

Introduce a register_dax flag in struct cxl_region_params and gate DAX
registration on this flag. Leave probe time registration disabled for
regions discovered during early CXL enumeration; set the flag only for
regions created dynamically at runtime to preserve existing behaviour.

This patch prepares the region code for later changes where cxl_dax
setup occurs from the HMEM path only after ownership arbitration
completes.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
---
 drivers/cxl/core/region.c | 21 ++++++++++++++++-----
 drivers/cxl/cxl.h         |  1 +
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 94dbbd6b5513..c17cd8706b9d 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -2540,9 +2540,11 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb,
 static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
 					      int id,
 					      enum cxl_partition_mode mode,
-					      enum cxl_decoder_type type)
+					      enum cxl_decoder_type type,
+					      bool register_dax)
 {
 	struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
+	struct cxl_region_params *p;
 	struct cxl_region *cxlr;
 	struct device *dev;
 	int rc;
@@ -2553,6 +2555,9 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
 	cxlr->mode = mode;
 	cxlr->type = type;
 
+	p = &cxlr->params;
+	p->register_dax = register_dax;
+
 	dev = &cxlr->dev;
 	rc = dev_set_name(dev, "region%d", id);
 	if (rc)
@@ -2593,7 +2598,8 @@ static ssize_t create_ram_region_show(struct device *dev,
 }
 
 static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
-					  enum cxl_partition_mode mode, int id)
+					  enum cxl_partition_mode mode, int id,
+					  bool register_dax)
 {
 	int rc;
 
@@ -2615,7 +2621,8 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
 		return ERR_PTR(-EBUSY);
 	}
 
-	return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM);
+	return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM,
+				   register_dax);
 }
 
 static ssize_t create_region_store(struct device *dev, const char *buf,
@@ -2629,7 +2636,7 @@ static ssize_t create_region_store(struct device *dev, const char *buf,
 	if (rc != 1)
 		return -EINVAL;
 
-	cxlr = __create_region(cxlrd, mode, id);
+	cxlr = __create_region(cxlrd, mode, id, true);
 	if (IS_ERR(cxlr))
 		return PTR_ERR(cxlr);
 
@@ -3523,7 +3530,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
 
 	do {
 		cxlr = __create_region(cxlrd, cxlds->part[part].mode,
-				       atomic_read(&cxlrd->region_id));
+				       atomic_read(&cxlrd->region_id), false);
 	} while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
 
 	if (IS_ERR(cxlr)) {
@@ -3930,6 +3937,10 @@ static int cxl_region_probe(struct device *dev)
 					p->res->start, p->res->end, cxlr,
 					is_system_ram) > 0)
 			return 0;
+
+		if (!p->register_dax)
+			return 0;
+
 		return devm_cxl_add_dax_region(cxlr);
 	default:
 		dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
index af78c9fd37f2..324220596890 100644
--- a/drivers/cxl/cxl.h
+++ b/drivers/cxl/cxl.h
@@ -495,6 +495,7 @@ struct cxl_region_params {
 	struct cxl_endpoint_decoder *targets[CXL_DECODER_MAX_INTERLEAVE];
 	int nr_targets;
 	resource_size_t cache_size;
+	bool register_dax;
 };
 
 enum cxl_partition_mode {
-- 
2.17.1
Re: [PATCH v4 6/9] cxl/region: Add register_dax flag to defer DAX setup
Posted by dan.j.williams@intel.com 2 months, 1 week ago
Smita Koralahalli wrote:
> Stop creating cxl_dax during cxl_region_probe(). Early DAX registration
> can online memory before ownership of Soft Reserved ranges is finalized.
> This makes it difficult to tear down regions later when HMEM determines
> that a region should not claim that range.
> 
> Introduce a register_dax flag in struct cxl_region_params and gate DAX
> registration on this flag. Leave probe time registration disabled for
> regions discovered during early CXL enumeration; set the flag only for
> regions created dynamically at runtime to preserve existing behaviour.
> 
> This patch prepares the region code for later changes where cxl_dax
> setup occurs from the HMEM path only after ownership arbitration
> completes.

This seems backwards to me. The dax subsystem knows when it wants to
move ahead with CXL or not, dax_cxl_mode is that indicator. So, just
share that variable with drivers/dax/cxl.c, arrange for
cxl_dax_region_probe() to fail while waiting for initial CXL probing to
succeed.

Once that point is reached move dax_cxl_mode to DAX_CXL_MODE_DROP, which
means drop the hmem alias, and go with the real-deal CXL region. Rescan
the dax-bus to retry cxl_dax_region_probe(). No need to bother 'struct
cxl_region' with a 'dax' flag, it just registers per normal and lets the
dax-subsystem handle accepting / rejecting.

Now, we do need a mechanism from dax-to-cxl to trigger region removal in
the DAX_CXL_MODE_REGISTER case (proceed with the hmem registration), but
that is separate from blocking the attachment of dax to CXL regions.
Keep all that complexity local to dax.
Re: [PATCH v4 6/9] cxl/region: Add register_dax flag to defer DAX setup
Posted by Koralahalli Channabasappa, Smita 1 month, 4 weeks ago
On 12/3/2025 4:22 PM, dan.j.williams@intel.com wrote:
> Smita Koralahalli wrote:
>> Stop creating cxl_dax during cxl_region_probe(). Early DAX registration
>> can online memory before ownership of Soft Reserved ranges is finalized.
>> This makes it difficult to tear down regions later when HMEM determines
>> that a region should not claim that range.
>>
>> Introduce a register_dax flag in struct cxl_region_params and gate DAX
>> registration on this flag. Leave probe time registration disabled for
>> regions discovered during early CXL enumeration; set the flag only for
>> regions created dynamically at runtime to preserve existing behaviour.
>>
>> This patch prepares the region code for later changes where cxl_dax
>> setup occurs from the HMEM path only after ownership arbitration
>> completes.
> 
> This seems backwards to me. The dax subsystem knows when it wants to
> move ahead with CXL or not, dax_cxl_mode is that indicator. So, just
> share that variable with drivers/dax/cxl.c, arrange for
> cxl_dax_region_probe() to fail while waiting for initial CXL probing to
> succeed.
> 
> Once that point is reached move dax_cxl_mode to DAX_CXL_MODE_DROP, which
> means drop the hmem alias, and go with the real-deal CXL region. Rescan
> the dax-bus to retry cxl_dax_region_probe(). No need to bother 'struct
> cxl_region' with a 'dax' flag, it just registers per normal and lets the
> dax-subsystem handle accepting / rejecting.
> 
> Now, we do need a mechanism from dax-to-cxl to trigger region removal in
> the DAX_CXL_MODE_REGISTER case (proceed with the hmem registration), but
> that is separate from blocking the attachment of dax to CXL regions.
> Keep all that complexity local to dax.

Okay. To make sure I'm aligned with your suggestion.

It should be something like below in cxl_dax_region_probe():

switch (dax_cxl_mode) {
case DAX_CXL_MODE_DEFER:
	return -EPROBE_DEFER;
case DAX_CXL_MODE_REGISTER:
	return -ENODEV;
case DAX_CXL_MODE_DROP:
default:
	break;
}

Then in the HMEM path, if the SR span is fully covered I will switch to
DAX_CXL_MODE_DROP and trigger a rescan.

Something like:

if (cxl_regions_fully_map(res->start, res->end)) {
	dax_cxl_mode = DAX_CXL_MODE_DROP;
	bus_rescan_devices(&cxl_bus_type);
} else {
	dax_cxl_mode = DAX_CXL_MODE_REGISTER;
	cxl_region_teardown(res->start, res->end);
}

hmem_register_device(host, target_nid, res);

cxl_regions_fully_map() will include changes as suggested in Patch 5.

Thanks
Smita
Re: [PATCH v4 6/9] cxl/region: Add register_dax flag to defer DAX setup
Posted by kernel test robot 2 months, 2 weeks ago
Hi Smita,

kernel test robot noticed the following build warnings:

[auto build test WARNING on 211ddde0823f1442e4ad052a2f30f050145ccada]

url:    https://github.com/intel-lab-lkp/linux/commits/Smita-Koralahalli/dax-hmem-e820-resource-Defer-Soft-Reserved-insertion-until-hmem-is-ready/20251120-112457
base:   211ddde0823f1442e4ad052a2f30f050145ccada
patch link:    https://lore.kernel.org/r/20251120031925.87762-7-Smita.KoralahalliChannabasappa%40amd.com
patch subject: [PATCH v4 6/9] cxl/region: Add register_dax flag to defer DAX setup
config: sparc64-randconfig-6002-20251120 (https://download.01.org/0day-ci/archive/20251121/202511210343.c0vb4NRc-lkp@intel.com/config)
compiler: sparc64-linux-gcc (GCC) 13.4.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20251121/202511210343.c0vb4NRc-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202511210343.c0vb4NRc-lkp@intel.com/

All warnings (new ones prefixed by >>):

>> Warning: drivers/cxl/core/region.c:2544 function parameter 'register_dax' not described in 'devm_cxl_add_region'

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH v4 6/9] cxl/region: Add register_dax flag to defer DAX setup
Posted by Koralahalli Channabasappa, Smita 2 months, 2 weeks ago
On 11/19/2025 7:19 PM, Smita Koralahalli wrote:
> Stop creating cxl_dax during cxl_region_probe(). Early DAX registration
> can online memory before ownership of Soft Reserved ranges is finalized.
> This makes it difficult to tear down regions later when HMEM determines
> that a region should not claim that range.
> 
> Introduce a register_dax flag in struct cxl_region_params and gate DAX
> registration on this flag. Leave probe time registration disabled for
> regions discovered during early CXL enumeration; set the flag only for
> regions created dynamically at runtime to preserve existing behaviour.
> 
> This patch prepares the region code for later changes where cxl_dax
> setup occurs from the HMEM path only after ownership arbitration
> completes.
> 
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> ---
>   drivers/cxl/core/region.c | 21 ++++++++++++++++-----
>   drivers/cxl/cxl.h         |  1 +
>   2 files changed, 17 insertions(+), 5 deletions(-)
> 
> diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
> index 94dbbd6b5513..c17cd8706b9d 100644
> --- a/drivers/cxl/core/region.c
> +++ b/drivers/cxl/core/region.c
> @@ -2540,9 +2540,11 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb,
>   static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
>   					      int id,
>   					      enum cxl_partition_mode mode,
> -					      enum cxl_decoder_type type)
> +					      enum cxl_decoder_type type,
> +					      bool register_dax)
>   {
>   	struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent);
> +	struct cxl_region_params *p;
>   	struct cxl_region *cxlr;
>   	struct device *dev;
>   	int rc;
> @@ -2553,6 +2555,9 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd,
>   	cxlr->mode = mode;
>   	cxlr->type = type;
>   
> +	p = &cxlr->params;
> +	p->register_dax = register_dax;
> +
>   	dev = &cxlr->dev;
>   	rc = dev_set_name(dev, "region%d", id);
>   	if (rc)
> @@ -2593,7 +2598,8 @@ static ssize_t create_ram_region_show(struct device *dev,
>   }
>   
>   static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
> -					  enum cxl_partition_mode mode, int id)
> +					  enum cxl_partition_mode mode, int id,
> +					  bool register_dax)
>   {
>   	int rc;
>   
> @@ -2615,7 +2621,8 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd,
>   		return ERR_PTR(-EBUSY);
>   	}
>   
> -	return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM);
> +	return devm_cxl_add_region(cxlrd, id, mode, CXL_DECODER_HOSTONLYMEM,
> +				   register_dax);
>   }
>   
>   static ssize_t create_region_store(struct device *dev, const char *buf,
> @@ -2629,7 +2636,7 @@ static ssize_t create_region_store(struct device *dev, const char *buf,
>   	if (rc != 1)
>   		return -EINVAL;
>   
> -	cxlr = __create_region(cxlrd, mode, id);
> +	cxlr = __create_region(cxlrd, mode, id, true);
>   	if (IS_ERR(cxlr))
>   		return PTR_ERR(cxlr);
>   
> @@ -3523,7 +3530,7 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd,
>   
>   	do {
>   		cxlr = __create_region(cxlrd, cxlds->part[part].mode,
> -				       atomic_read(&cxlrd->region_id));
> +				       atomic_read(&cxlrd->region_id), false);
>   	} while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY);
>   
>   	if (IS_ERR(cxlr)) {
> @@ -3930,6 +3937,10 @@ static int cxl_region_probe(struct device *dev)
>   					p->res->start, p->res->end, cxlr,
>   					is_system_ram) > 0)
>   			return 0;
> +
> +		if (!p->register_dax)
> +			return 0;

Sorry, I missed this. It should continue registering DAX if HMEM is 
disabled. I will fix this in v5 and add a comment here

-		if (!p->register_dax)
-			return 0;
+		/*
+		 * Only skip probe time DAX if HMEM will handle it
+		 * later.
+		 */
+		if (IS_ENABLED(CONFIG_DEV_DAX_HMEM) && !p->register_dax)
+			return 0;
> +
>   		return devm_cxl_add_dax_region(cxlr);
>   	default:
>   		dev_dbg(&cxlr->dev, "unsupported region mode: %d\n",
> diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
> index af78c9fd37f2..324220596890 100644
> --- a/drivers/cxl/cxl.h
> +++ b/drivers/cxl/cxl.h
> @@ -495,6 +495,7 @@ struct cxl_region_params {
>   	struct cxl_endpoint_decoder *targets[CXL_DECODER_MAX_INTERLEAVE];
>   	int nr_targets;
>   	resource_size_t cache_size;
> +	bool register_dax;
>   };
>   
>   enum cxl_partition_mode {