[PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready

Smita Koralahalli posted 6 patches 1 month, 1 week ago
There is a newer version of this series
[PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by Smita Koralahalli 1 month, 1 week ago
Insert Soft Reserved memory into a dedicated soft_reserve_resource tree
instead of the iomem_resource tree at boot.

Publishing Soft Reserved ranges into iomem too early causes conflicts with
CXL hotplug and region assembly failure, especially when Soft Reserved
overlaps CXL regions.

Re-inserting these ranges into iomem will be handled in follow-up patches,
after ensuring CXL window publication ordering is stabilized and when the
dax_hmem is ready to consume them.

This avoids trimming or deleting resources later and provides a cleaner
handoff between EFI-defined memory and CXL resource management.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/kernel/e820.c    |  2 +-
 drivers/dax/hmem/device.c |  4 +--
 drivers/dax/hmem/hmem.c   |  8 +++++
 include/linux/ioport.h    | 24 +++++++++++++
 kernel/resource.c         | 73 +++++++++++++++++++++++++++++++++------
 5 files changed, 97 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index c3acbd26408b..aef1ff2cabda 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1153,7 +1153,7 @@ void __init e820__reserve_resources_late(void)
 	res = e820_res;
 	for (i = 0; i < e820_table->nr_entries; i++) {
 		if (!res->parent && res->end)
-			insert_resource_expand_to_fit(&iomem_resource, res);
+			insert_resource_late(res);
 		res++;
 	}
 
diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
index f9e1a76a04a9..22732b729017 100644
--- a/drivers/dax/hmem/device.c
+++ b/drivers/dax/hmem/device.c
@@ -83,8 +83,8 @@ static __init int hmem_register_one(struct resource *res, void *data)
 
 static __init int hmem_init(void)
 {
-	walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
-			IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
+	walk_soft_reserve_res_desc(IORES_DESC_SOFT_RESERVED, IORESOURCE_MEM, 0,
+				   -1, NULL, hmem_register_one);
 	return 0;
 }
 
diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
index c18451a37e4f..d5b8f06d531e 100644
--- a/drivers/dax/hmem/hmem.c
+++ b/drivers/dax/hmem/hmem.c
@@ -73,10 +73,18 @@ static int hmem_register_device(struct device *host, int target_nid,
 		return 0;
 	}
 
+#ifdef CONFIG_EFI_SOFT_RESERVE
+	rc = region_intersects_soft_reserve(res->start, resource_size(res),
+					    IORESOURCE_MEM,
+					    IORES_DESC_SOFT_RESERVED);
+	if (rc != REGION_INTERSECTS)
+		return 0;
+#else
 	rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
 			       IORES_DESC_SOFT_RESERVED);
 	if (rc != REGION_INTERSECTS)
 		return 0;
+#endif
 
 	id = memregion_alloc(GFP_KERNEL);
 	if (id < 0) {
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index e8b2d6aa4013..889bc4982777 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -232,6 +232,9 @@ struct resource_constraint {
 /* PC/ISA/whatever - the normal PC address spaces: IO and memory */
 extern struct resource ioport_resource;
 extern struct resource iomem_resource;
+#ifdef CONFIG_EFI_SOFT_RESERVE
+extern struct resource soft_reserve_resource;
+#endif
 
 extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
 extern int request_resource(struct resource *root, struct resource *new);
@@ -255,6 +258,22 @@ int adjust_resource(struct resource *res, resource_size_t start,
 		    resource_size_t size);
 resource_size_t resource_alignment(struct resource *res);
 
+
+#ifdef CONFIG_EFI_SOFT_RESERVE
+static inline void insert_resource_late(struct resource *new)
+{
+	if (new->desc == IORES_DESC_SOFT_RESERVED)
+		insert_resource_expand_to_fit(&soft_reserve_resource, new);
+	else
+		insert_resource_expand_to_fit(&iomem_resource, new);
+}
+#else
+static inline void insert_resource_late(struct resource *new)
+{
+	insert_resource_expand_to_fit(&iomem_resource, new);
+}
+#endif
+
 /**
  * resource_set_size - Calculate resource end address from size and start
  * @res: Resource descriptor
@@ -409,6 +428,11 @@ walk_system_ram_res_rev(u64 start, u64 end, void *arg,
 extern int
 walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
 		    void *arg, int (*func)(struct resource *, void *));
+int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
+			       u64 start, u64 end, void *arg,
+			       int (*func)(struct resource *, void *));
+int region_intersects_soft_reserve(resource_size_t start, size_t size,
+				   unsigned long flags, unsigned long desc);
 
 struct resource *devm_request_free_mem_region(struct device *dev,
 		struct resource *base, unsigned long size);
diff --git a/kernel/resource.c b/kernel/resource.c
index f9bb5481501a..8479a99441e2 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -321,13 +321,14 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
 }
 
 /**
- * find_next_iomem_res - Finds the lowest iomem resource that covers part of
- *			 [@start..@end].
+ * find_next_res - Finds the lowest resource that covers part of
+ *		   [@start..@end].
  *
  * If a resource is found, returns 0 and @*res is overwritten with the part
  * of the resource that's within [@start..@end]; if none is found, returns
  * -ENODEV.  Returns -EINVAL for invalid parameters.
  *
+ * @parent:	resource tree root to search
  * @start:	start address of the resource searched for
  * @end:	end address of same resource
  * @flags:	flags which the resource must have
@@ -337,9 +338,9 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
  * The caller must specify @start, @end, @flags, and @desc
  * (which may be IORES_DESC_NONE).
  */
-static int find_next_iomem_res(resource_size_t start, resource_size_t end,
-			       unsigned long flags, unsigned long desc,
-			       struct resource *res)
+static int find_next_res(struct resource *parent, resource_size_t start,
+			 resource_size_t end, unsigned long flags,
+			 unsigned long desc, struct resource *res)
 {
 	struct resource *p;
 
@@ -351,7 +352,7 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
 
 	read_lock(&resource_lock);
 
-	for_each_resource(&iomem_resource, p, false) {
+	for_each_resource(parent, p, false) {
 		/* If we passed the resource we are looking for, stop */
 		if (p->start > end) {
 			p = NULL;
@@ -382,16 +383,23 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
 	return p ? 0 : -ENODEV;
 }
 
-static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
-				 unsigned long flags, unsigned long desc,
-				 void *arg,
-				 int (*func)(struct resource *, void *))
+static int find_next_iomem_res(resource_size_t start, resource_size_t end,
+			       unsigned long flags, unsigned long desc,
+			       struct resource *res)
+{
+	return find_next_res(&iomem_resource, start, end, flags, desc, res);
+}
+
+static int walk_res_desc(struct resource *parent, resource_size_t start,
+			 resource_size_t end, unsigned long flags,
+			 unsigned long desc, void *arg,
+			 int (*func)(struct resource *, void *))
 {
 	struct resource res;
 	int ret = -EINVAL;
 
 	while (start < end &&
-	       !find_next_iomem_res(start, end, flags, desc, &res)) {
+	       !find_next_res(parent, start, end, flags, desc, &res)) {
 		ret = (*func)(&res, arg);
 		if (ret)
 			break;
@@ -402,6 +410,15 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
 	return ret;
 }
 
+static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
+				 unsigned long flags, unsigned long desc,
+				 void *arg,
+				 int (*func)(struct resource *, void *))
+{
+	return walk_res_desc(&iomem_resource, start, end, flags, desc, arg, func);
+}
+
+
 /**
  * walk_iomem_res_desc - Walks through iomem resources and calls func()
  *			 with matching resource ranges.
@@ -426,6 +443,26 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
 }
 EXPORT_SYMBOL_GPL(walk_iomem_res_desc);
 
+#ifdef CONFIG_EFI_SOFT_RESERVE
+struct resource soft_reserve_resource = {
+	.name	= "Soft Reserved",
+	.start	= 0,
+	.end	= -1,
+	.desc	= IORES_DESC_SOFT_RESERVED,
+	.flags	= IORESOURCE_MEM,
+};
+EXPORT_SYMBOL_GPL(soft_reserve_resource);
+
+int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
+			       u64 start, u64 end, void *arg,
+			       int (*func)(struct resource *, void *))
+{
+	return walk_res_desc(&soft_reserve_resource, start, end, flags, desc,
+			     arg, func);
+}
+EXPORT_SYMBOL_GPL(walk_soft_reserve_res_desc);
+#endif
+
 /*
  * This function calls the @func callback against all memory ranges of type
  * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
@@ -648,6 +685,20 @@ int region_intersects(resource_size_t start, size_t size, unsigned long flags,
 }
 EXPORT_SYMBOL_GPL(region_intersects);
 
+int region_intersects_soft_reserve(resource_size_t start, size_t size,
+				   unsigned long flags, unsigned long desc)
+{
+	int ret;
+
+	read_lock(&resource_lock);
+	ret = __region_intersects(&soft_reserve_resource, start, size, flags,
+				  desc);
+	read_unlock(&resource_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(region_intersects_soft_reserve);
+
 void __weak arch_remove_reservations(struct resource *avail)
 {
 }
-- 
2.17.1
Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by Borislav Petkov 3 weeks, 3 days ago
On Fri, Aug 22, 2025 at 03:41:57AM +0000, Smita Koralahalli wrote:
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index c3acbd26408b..aef1ff2cabda 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -1153,7 +1153,7 @@ void __init e820__reserve_resources_late(void)
>  	res = e820_res;
>  	for (i = 0; i < e820_table->nr_entries; i++) {
>  		if (!res->parent && res->end)
> -			insert_resource_expand_to_fit(&iomem_resource, res);
> +			insert_resource_late(res);
>  		res++;
>  	}
>

Btw, this doesn't even build and cover letter doesn't say what it applies
ontop so I applied it on my pile of tip/master.

kernel/resource.c: In function ‘region_intersects_soft_reserve’:
kernel/resource.c:694:36: error: ‘soft_reserve_resource’ undeclared (first use in this function); did you mean ‘devm_release_resource’?
  694 |         ret = __region_intersects(&soft_reserve_resource, start, size, flags,
      |                                    ^~~~~~~~~~~~~~~~~~~~~
      |                                    devm_release_resource
kernel/resource.c:694:36: note: each undeclared identifier is reported only once for each function it appears in
make[3]: *** [scripts/Makefile.build:287: kernel/resource.o] Error 1
make[2]: *** [scripts/Makefile.build:556: kernel] Error 2
make[2]: *** Waiting for unfinished jobs....
make[1]: *** [/mnt/kernel/kernel/2nd/linux/Makefile:2011: .] Error 2
make: *** [Makefile:248: __sub-make] Error 2

Also, I'd do this resource insertion a bit differently:

insert_resource_expand_to_fit(struct resource *new)
{
	struct resource *root = &iomem_resource;

	if (new->desc == IORES_DESC_SOFT_RESERVED)
		root = &soft_reserve_resource;

	return __insert_resource_expand_to_fit(root, new);
}

and rename the current insert_resource_expand_to_fit() to the __ variant.

It looks like you want to intercept all callers of
insert_resource_expand_to_fit() instead of defining a separate set which works
on the soft-reserve thing.

Oh well, the resource code is yucky already.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by Koralahalli Channabasappa, Smita 3 days, 19 hours ago
Hi Boris,

On 9/9/2025 9:12 AM, Borislav Petkov wrote:
> On Fri, Aug 22, 2025 at 03:41:57AM +0000, Smita Koralahalli wrote:
>> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
>> index c3acbd26408b..aef1ff2cabda 100644
>> --- a/arch/x86/kernel/e820.c
>> +++ b/arch/x86/kernel/e820.c
>> @@ -1153,7 +1153,7 @@ void __init e820__reserve_resources_late(void)
>>   	res = e820_res;
>>   	for (i = 0; i < e820_table->nr_entries; i++) {
>>   		if (!res->parent && res->end)
>> -			insert_resource_expand_to_fit(&iomem_resource, res);
>> +			insert_resource_late(res);
>>   		res++;
>>   	}
>>
> 
> Btw, this doesn't even build and cover letter doesn't say what it applies
> ontop so I applied it on my pile of tip/master.
> 
> kernel/resource.c: In function ‘region_intersects_soft_reserve’:
> kernel/resource.c:694:36: error: ‘soft_reserve_resource’ undeclared (first use in this function); did you mean ‘devm_release_resource’?
>    694 |         ret = __region_intersects(&soft_reserve_resource, start, size, flags,
>        |                                    ^~~~~~~~~~~~~~~~~~~~~
>        |                                    devm_release_resource
> kernel/resource.c:694:36: note: each undeclared identifier is reported only once for each function it appears in
> make[3]: *** [scripts/Makefile.build:287: kernel/resource.o] Error 1
> make[2]: *** [scripts/Makefile.build:556: kernel] Error 2
> make[2]: *** Waiting for unfinished jobs....
> make[1]: *** [/mnt/kernel/kernel/2nd/linux/Makefile:2011: .] Error 2
> make: *** [Makefile:248: __sub-make] Error 2

Apologies for the delay.

This was based on mainline. I have rebased the series onto the latest 
mainline and sent out a new revision and noted it in the cover letter.

https://lore.kernel.org/all/20250930044757.214798-2-Smita.KoralahalliChannabasappa@amd.com/

> 
> Also, I'd do this resource insertion a bit differently:
> 
> insert_resource_expand_to_fit(struct resource *new)
> {
> 	struct resource *root = &iomem_resource;
> 
> 	if (new->desc == IORES_DESC_SOFT_RESERVED)
> 		root = &soft_reserve_resource;
> 
> 	return __insert_resource_expand_to_fit(root, new);
> }
> 
> and rename the current insert_resource_expand_to_fit() to the __ variant.

I have made these changes as well.

Thanks
Smita

> 
> It looks like you want to intercept all callers of
> insert_resource_expand_to_fit() instead of defining a separate set which works
> on the soft-reserve thing.
> 
> Oh well, the resource code is yucky already.
> 

Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by dan.j.williams@intel.com 3 weeks, 4 days ago
[ add Boris and Ard ]

Ard, Boris, can you have a look at the touches to early e820/x86 init
(insert_resource_late()) and give an ack (or nak). The general problem
here is conflicts between e820 memory resources and CXL subsystem memory
resources.

Smita Koralahalli wrote:
> Insert Soft Reserved memory into a dedicated soft_reserve_resource tree
> instead of the iomem_resource tree at boot.
> 
> Publishing Soft Reserved ranges into iomem too early causes conflicts with
> CXL hotplug and region assembly failure, especially when Soft Reserved
> overlaps CXL regions.
> 
> Re-inserting these ranges into iomem will be handled in follow-up patches,
> after ensuring CXL window publication ordering is stabilized and when the
> dax_hmem is ready to consume them.
> 
> This avoids trimming or deleting resources later and provides a cleaner
> handoff between EFI-defined memory and CXL resource management.
> 
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>


> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Smita, if you added changes this should have Co-developed-by. Otherwise
plain Signed-off-by is interpreted as only chain of custody.  Any other
patches that you add my Signed-off-by to should also have
Co-developed-by or be From: me.

Alternatively if you completely rewritel a patch with your own approach
then note the source (with a Link:) and leave off the original SOB.

Lastly, in this case it looks unmodified from what I wrote? Then it
should be:

From: Dan Williams <dan.j.williams@intel.com>

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>

...to show the chain of custody of you forwarding a diff authored
completely by someone else.
Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by Koralahalli Channabasappa, Smita 3 weeks, 1 day ago
On 9/8/2025 4:01 PM, dan.j.williams@intel.com wrote:
> [ add Boris and Ard ]
> 
> Ard, Boris, can you have a look at the touches to early e820/x86 init
> (insert_resource_late()) and give an ack (or nak). The general problem
> here is conflicts between e820 memory resources and CXL subsystem memory
> resources.
> 
> Smita Koralahalli wrote:
>> Insert Soft Reserved memory into a dedicated soft_reserve_resource tree
>> instead of the iomem_resource tree at boot.
>>
>> Publishing Soft Reserved ranges into iomem too early causes conflicts with
>> CXL hotplug and region assembly failure, especially when Soft Reserved
>> overlaps CXL regions.
>>
>> Re-inserting these ranges into iomem will be handled in follow-up patches,
>> after ensuring CXL window publication ordering is stabilized and when the
>> dax_hmem is ready to consume them.
>>
>> This avoids trimming or deleting resources later and provides a cleaner
>> handoff between EFI-defined memory and CXL resource management.
>>
>> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> 
> 
>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> 
> Smita, if you added changes this should have Co-developed-by. Otherwise
> plain Signed-off-by is interpreted as only chain of custody.  Any other
> patches that you add my Signed-off-by to should also have
> Co-developed-by or be From: me.
> 
> Alternatively if you completely rewritel a patch with your own approach
> then note the source (with a Link:) and leave off the original SOB.
> 
> Lastly, in this case it looks unmodified from what I wrote? Then it
> should be:
> 
> From: Dan Williams <dan.j.williams@intel.com>
> 
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> 
> ...to show the chain of custody of you forwarding a diff authored
> completely by someone else.

Thanks for clarifying, Dan. I wasn’t aware of the distinction before 
(especially to handle the chain of custody..). I will update to reflect 
that properly and will also be careful with how I handle authorship and 
sign-offs in future submissions.

Thanks
Smita
Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by dan.j.williams@intel.com 1 week, 1 day ago
Koralahalli Channabasappa, Smita wrote:
> On 9/8/2025 4:01 PM, dan.j.williams@intel.com wrote:
> > [ add Boris and Ard ]
> > 
> > Ard, Boris, can you have a look at the touches to early e820/x86 init
> > (insert_resource_late()) and give an ack (or nak). The general problem
> > here is conflicts between e820 memory resources and CXL subsystem memory
> > resources.
> > 
> > Smita Koralahalli wrote:
> >> Insert Soft Reserved memory into a dedicated soft_reserve_resource tree
> >> instead of the iomem_resource tree at boot.
> >>
> >> Publishing Soft Reserved ranges into iomem too early causes conflicts with
> >> CXL hotplug and region assembly failure, especially when Soft Reserved
> >> overlaps CXL regions.
> >>
> >> Re-inserting these ranges into iomem will be handled in follow-up patches,
> >> after ensuring CXL window publication ordering is stabilized and when the
> >> dax_hmem is ready to consume them.
> >>
> >> This avoids trimming or deleting resources later and provides a cleaner
> >> handoff between EFI-defined memory and CXL resource management.
> >>
> >> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> > 
> > 
> >> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > 
> > Smita, if you added changes this should have Co-developed-by. Otherwise
> > plain Signed-off-by is interpreted as only chain of custody.  Any other
> > patches that you add my Signed-off-by to should also have
> > Co-developed-by or be From: me.
> > 
> > Alternatively if you completely rewritel a patch with your own approach
> > then note the source (with a Link:) and leave off the original SOB.
> > 
> > Lastly, in this case it looks unmodified from what I wrote? Then it
> > should be:
> > 
> > From: Dan Williams <dan.j.williams@intel.com>
> > 
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> > 
> > ...to show the chain of custody of you forwarding a diff authored
> > completely by someone else.
> 
> Thanks for clarifying, Dan. I wasn’t aware of the distinction before 
> (especially to handle the chain of custody..). I will update to reflect 
> that properly and will also be careful with how I handle authorship and 
> sign-offs in future submissions.

Yeah, the choice to replace From: is subjective and I usually reserve
that for significant rewrites. If any of the original patch remains then
add Co-developed-by + Signed-off-by. If none of the original patch
remains I do still like to say:

"based on an original patch by ..." with a Link: to that inspiration
patch.

...and if you just forward the original, keep From: untouched and just
add your own Signed-off-by as documented in
Documentation/process/submitting-patches.rst.
Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by Zhijian Li (Fujitsu) 1 month ago

On 22/08/2025 11:41, Smita Koralahalli wrote:
> Insert Soft Reserved memory into a dedicated soft_reserve_resource tree
> instead of the iomem_resource tree at boot.
> 
> Publishing Soft Reserved ranges into iomem too early causes conflicts with
> CXL hotplug and region assembly failure, especially when Soft Reserved
> overlaps CXL regions.
> 
> Re-inserting these ranges into iomem will be handled in follow-up patches,
> after ensuring CXL window publication ordering is stabilized and when the
> dax_hmem is ready to consume them.
> 
> This avoids trimming or deleting resources later and provides a cleaner
> handoff between EFI-defined memory and CXL resource management.
> 
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>   arch/x86/kernel/e820.c    |  2 +-
>   drivers/dax/hmem/device.c |  4 +--
>   drivers/dax/hmem/hmem.c   |  8 +++++
>   include/linux/ioport.h    | 24 +++++++++++++
>   kernel/resource.c         | 73 +++++++++++++++++++++++++++++++++------
>   5 files changed, 97 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index c3acbd26408b..aef1ff2cabda 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -1153,7 +1153,7 @@ void __init e820__reserve_resources_late(void)
>   	res = e820_res;
>   	for (i = 0; i < e820_table->nr_entries; i++) {
>   		if (!res->parent && res->end)
> -			insert_resource_expand_to_fit(&iomem_resource, res);
> +			insert_resource_late(res);
>   		res++;
>   	}
>   
> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
> index f9e1a76a04a9..22732b729017 100644
> --- a/drivers/dax/hmem/device.c
> +++ b/drivers/dax/hmem/device.c
> @@ -83,8 +83,8 @@ static __init int hmem_register_one(struct resource *res, void *data)
>   
>   static __init int hmem_init(void)
>   {
> -	walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
> -			IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
> +	walk_soft_reserve_res_desc(IORES_DESC_SOFT_RESERVED, IORESOURCE_MEM, 0,
> +				   -1, NULL, hmem_register_one);
>   	return 0;
>   }
>   
> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
> index c18451a37e4f..d5b8f06d531e 100644
> --- a/drivers/dax/hmem/hmem.c
> +++ b/drivers/dax/hmem/hmem.c
> @@ -73,10 +73,18 @@ static int hmem_register_device(struct device *host, int target_nid,
>   		return 0;
>   	}
>   
> +#ifdef CONFIG_EFI_SOFT_RESERVE


Note that dax_kmem currently depends on CONFIG_EFI_SOFT_RESERVED, so this conditional check may be redundant.



> +	rc = region_intersects_soft_reserve(res->start, resource_size(res),
> +					    IORESOURCE_MEM,
> +					    IORES_DESC_SOFT_RESERVED);
> +	if (rc != REGION_INTERSECTS)
> +		return 0;
> +#else
>   	rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
>   			       IORES_DESC_SOFT_RESERVED);
>   	if (rc != REGION_INTERSECTS)
>   		return 0;
> +#endif
>   

Additionally, please add a TODO note here (e.g., "Add soft-reserved memory back to iomem").


>   	id = memregion_alloc(GFP_KERNEL);
>   	if (id < 0) {
> diff --git a/include/linux/ioport.h b/include/linux/ioport.h
> index e8b2d6aa4013..889bc4982777 100644
> --- a/include/linux/ioport.h
> +++ b/include/linux/ioport.h
> @@ -232,6 +232,9 @@ struct resource_constraint {
>   /* PC/ISA/whatever - the normal PC address spaces: IO and memory */
>   extern struct resource ioport_resource;
>   extern struct resource iomem_resource;
> +#ifdef CONFIG_EFI_SOFT_RESERVE
> +extern struct resource soft_reserve_resource;
> +#endif
>   
>   extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
>   extern int request_resource(struct resource *root, struct resource *new);
> @@ -255,6 +258,22 @@ int adjust_resource(struct resource *res, resource_size_t start,
>   		    resource_size_t size);
>   resource_size_t resource_alignment(struct resource *res);
>   
> +
> +#ifdef CONFIG_EFI_SOFT_RESERVE
> +static inline void insert_resource_late(struct resource *new)
> +{
> +	if (new->desc == IORES_DESC_SOFT_RESERVED)
> +		insert_resource_expand_to_fit(&soft_reserve_resource, new);
> +	else
> +		insert_resource_expand_to_fit(&iomem_resource, new);
> +}
> +#else
> +static inline void insert_resource_late(struct resource *new)
> +{
> +	insert_resource_expand_to_fit(&iomem_resource, new);
> +}
> +#endif
> +
>   /**
>    * resource_set_size - Calculate resource end address from size and start
>    * @res: Resource descriptor
> @@ -409,6 +428,11 @@ walk_system_ram_res_rev(u64 start, u64 end, void *arg,
>   extern int
>   walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
>   		    void *arg, int (*func)(struct resource *, void *));
> +int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
> +			       u64 start, u64 end, void *arg,
> +			       int (*func)(struct resource *, void *));
> +int region_intersects_soft_reserve(resource_size_t start, size_t size,
> +				   unsigned long flags, unsigned long desc);
>   
>   struct resource *devm_request_free_mem_region(struct device *dev,
>   		struct resource *base, unsigned long size);
> diff --git a/kernel/resource.c b/kernel/resource.c
> index f9bb5481501a..8479a99441e2 100644
> --- a/kernel/resource.c
> +++ b/kernel/resource.c
> @@ -321,13 +321,14 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
>   }
>   
>   /**
> - * find_next_iomem_res - Finds the lowest iomem resource that covers part of
> - *			 [@start..@end].
> + * find_next_res - Finds the lowest resource that covers part of
> + *		   [@start..@end].
>    *
>    * If a resource is found, returns 0 and @*res is overwritten with the part
>    * of the resource that's within [@start..@end]; if none is found, returns
>    * -ENODEV.  Returns -EINVAL for invalid parameters.
>    *
> + * @parent:	resource tree root to search
>    * @start:	start address of the resource searched for
>    * @end:	end address of same resource
>    * @flags:	flags which the resource must have
> @@ -337,9 +338,9 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
>    * The caller must specify @start, @end, @flags, and @desc
>    * (which may be IORES_DESC_NONE).
>    */
> -static int find_next_iomem_res(resource_size_t start, resource_size_t end,
> -			       unsigned long flags, unsigned long desc,
> -			       struct resource *res)
> +static int find_next_res(struct resource *parent, resource_size_t start,
> +			 resource_size_t end, unsigned long flags,
> +			 unsigned long desc, struct resource *res)
>   {
>   	struct resource *p;
>   
> @@ -351,7 +352,7 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
>   
>   	read_lock(&resource_lock);
>   
> -	for_each_resource(&iomem_resource, p, false) {
> +	for_each_resource(parent, p, false) {
>   		/* If we passed the resource we are looking for, stop */
>   		if (p->start > end) {
>   			p = NULL;
> @@ -382,16 +383,23 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
>   	return p ? 0 : -ENODEV;
>   }
>   
> -static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
> -				 unsigned long flags, unsigned long desc,
> -				 void *arg,
> -				 int (*func)(struct resource *, void *))
> +static int find_next_iomem_res(resource_size_t start, resource_size_t end,
> +			       unsigned long flags, unsigned long desc,
> +			       struct resource *res)
> +{
> +	return find_next_res(&iomem_resource, start, end, flags, desc, res);
> +}
> +
> +static int walk_res_desc(struct resource *parent, resource_size_t start,
> +			 resource_size_t end, unsigned long flags,
> +			 unsigned long desc, void *arg,
> +			 int (*func)(struct resource *, void *))
>   {
>   	struct resource res;
>   	int ret = -EINVAL;
>   
>   	while (start < end &&
> -	       !find_next_iomem_res(start, end, flags, desc, &res)) {
> +	       !find_next_res(parent, start, end, flags, desc, &res)) {
>   		ret = (*func)(&res, arg);
>   		if (ret)
>   			break;
> @@ -402,6 +410,15 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
>   	return ret;
>   }
>   
> +static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
> +				 unsigned long flags, unsigned long desc,
> +				 void *arg,
> +				 int (*func)(struct resource *, void *))
> +{
> +	return walk_res_desc(&iomem_resource, start, end, flags, desc, arg, func);
> +}
> +
> +
>   /**
>    * walk_iomem_res_desc - Walks through iomem resources and calls func()
>    *			 with matching resource ranges.
> @@ -426,6 +443,26 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
>   }
>   EXPORT_SYMBOL_GPL(walk_iomem_res_desc);
>   
> +#ifdef CONFIG_EFI_SOFT_RESERVE
> +struct resource soft_reserve_resource = {
> +	.name	= "Soft Reserved",
> +	.start	= 0,
> +	.end	= -1,
> +	.desc	= IORES_DESC_SOFT_RESERVED,
> +	.flags	= IORESOURCE_MEM,
> +};
> +EXPORT_SYMBOL_GPL(soft_reserve_resource);
> +
> +int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
> +			       u64 start, u64 end, void *arg,
> +			       int (*func)(struct resource *, void *))
> +{
> +	return walk_res_desc(&soft_reserve_resource, start, end, flags, desc,
> +			     arg, func);
> +}
> +EXPORT_SYMBOL_GPL(walk_soft_reserve_res_desc);
> +#endif
> +
>   /*
>    * This function calls the @func callback against all memory ranges of type
>    * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
> @@ -648,6 +685,20 @@ int region_intersects(resource_size_t start, size_t size, unsigned long flags,
>   }
>   EXPORT_SYMBOL_GPL(region_intersects);
>   
> +int region_intersects_soft_reserve(resource_size_t start, size_t size,
> +				   unsigned long flags, unsigned long desc)


Shouldn't this function be implemented uder `#if CONFIG_EFI_SOFT_RESERVE`? Otherwise it may cause compilation failures when the config is disabled.


Thanks
Zhijian
Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by Koralahalli Channabasappa, Smita 3 days, 20 hours ago
Hi Zhijian,

Sorry for the delay here.

On 8/31/2025 7:59 PM, Zhijian Li (Fujitsu) wrote:
> 
> 
> On 22/08/2025 11:41, Smita Koralahalli wrote:
>> Insert Soft Reserved memory into a dedicated soft_reserve_resource tree
>> instead of the iomem_resource tree at boot.
>>
>> Publishing Soft Reserved ranges into iomem too early causes conflicts with
>> CXL hotplug and region assembly failure, especially when Soft Reserved
>> overlaps CXL regions.
>>
>> Re-inserting these ranges into iomem will be handled in follow-up patches,
>> after ensuring CXL window publication ordering is stabilized and when the
>> dax_hmem is ready to consume them.
>>
>> This avoids trimming or deleting resources later and provides a cleaner
>> handoff between EFI-defined memory and CXL resource management.
>>
>> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>> ---
>>    arch/x86/kernel/e820.c    |  2 +-
>>    drivers/dax/hmem/device.c |  4 +--
>>    drivers/dax/hmem/hmem.c   |  8 +++++
>>    include/linux/ioport.h    | 24 +++++++++++++
>>    kernel/resource.c         | 73 +++++++++++++++++++++++++++++++++------
>>    5 files changed, 97 insertions(+), 14 deletions(-)
>>
>> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
>> index c3acbd26408b..aef1ff2cabda 100644
>> --- a/arch/x86/kernel/e820.c
>> +++ b/arch/x86/kernel/e820.c
>> @@ -1153,7 +1153,7 @@ void __init e820__reserve_resources_late(void)
>>    	res = e820_res;
>>    	for (i = 0; i < e820_table->nr_entries; i++) {
>>    		if (!res->parent && res->end)
>> -			insert_resource_expand_to_fit(&iomem_resource, res);
>> +			insert_resource_late(res);
>>    		res++;
>>    	}
>>    
>> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
>> index f9e1a76a04a9..22732b729017 100644
>> --- a/drivers/dax/hmem/device.c
>> +++ b/drivers/dax/hmem/device.c
>> @@ -83,8 +83,8 @@ static __init int hmem_register_one(struct resource *res, void *data)
>>    
>>    static __init int hmem_init(void)
>>    {
>> -	walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
>> -			IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
>> +	walk_soft_reserve_res_desc(IORES_DESC_SOFT_RESERVED, IORESOURCE_MEM, 0,
>> +				   -1, NULL, hmem_register_one);
>>    	return 0;
>>    }
>>    
>> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
>> index c18451a37e4f..d5b8f06d531e 100644
>> --- a/drivers/dax/hmem/hmem.c
>> +++ b/drivers/dax/hmem/hmem.c
>> @@ -73,10 +73,18 @@ static int hmem_register_device(struct device *host, int target_nid,
>>    		return 0;
>>    	}
>>    
>> +#ifdef CONFIG_EFI_SOFT_RESERVE
> 
> 
> Note that dax_kmem currently depends on CONFIG_EFI_SOFT_RESERVED, so this conditional check may be redundant.

Removed in v2.

> 
> 
> 
>> +	rc = region_intersects_soft_reserve(res->start, resource_size(res),
>> +					    IORESOURCE_MEM,
>> +					    IORES_DESC_SOFT_RESERVED);
>> +	if (rc != REGION_INTERSECTS)
>> +		return 0;
>> +#else
>>    	rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
>>    			       IORES_DESC_SOFT_RESERVED);
>>    	if (rc != REGION_INTERSECTS)
>>    		return 0;
>> +#endif
>>    
> 
> Additionally, please add a TODO note here (e.g., "Add soft-reserved memory back to iomem").

Added.

> 
> 
>>    	id = memregion_alloc(GFP_KERNEL);
>>    	if (id < 0) {
>> diff --git a/include/linux/ioport.h b/include/linux/ioport.h
>> index e8b2d6aa4013..889bc4982777 100644
>> --- a/include/linux/ioport.h
>> +++ b/include/linux/ioport.h
>> @@ -232,6 +232,9 @@ struct resource_constraint {
>>    /* PC/ISA/whatever - the normal PC address spaces: IO and memory */
>>    extern struct resource ioport_resource;
>>    extern struct resource iomem_resource;
>> +#ifdef CONFIG_EFI_SOFT_RESERVE
>> +extern struct resource soft_reserve_resource;
>> +#endif
>>    
>>    extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
>>    extern int request_resource(struct resource *root, struct resource *new);
>> @@ -255,6 +258,22 @@ int adjust_resource(struct resource *res, resource_size_t start,
>>    		    resource_size_t size);
>>    resource_size_t resource_alignment(struct resource *res);
>>    
>> +
>> +#ifdef CONFIG_EFI_SOFT_RESERVE
>> +static inline void insert_resource_late(struct resource *new)
>> +{
>> +	if (new->desc == IORES_DESC_SOFT_RESERVED)
>> +		insert_resource_expand_to_fit(&soft_reserve_resource, new);
>> +	else
>> +		insert_resource_expand_to_fit(&iomem_resource, new);
>> +}
>> +#else
>> +static inline void insert_resource_late(struct resource *new)
>> +{
>> +	insert_resource_expand_to_fit(&iomem_resource, new);
>> +}
>> +#endif
>> +
>>    /**
>>     * resource_set_size - Calculate resource end address from size and start
>>     * @res: Resource descriptor
>> @@ -409,6 +428,11 @@ walk_system_ram_res_rev(u64 start, u64 end, void *arg,
>>    extern int
>>    walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
>>    		    void *arg, int (*func)(struct resource *, void *));
>> +int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
>> +			       u64 start, u64 end, void *arg,
>> +			       int (*func)(struct resource *, void *));
>> +int region_intersects_soft_reserve(resource_size_t start, size_t size,
>> +				   unsigned long flags, unsigned long desc);
>>    
>>    struct resource *devm_request_free_mem_region(struct device *dev,
>>    		struct resource *base, unsigned long size);
>> diff --git a/kernel/resource.c b/kernel/resource.c
>> index f9bb5481501a..8479a99441e2 100644
>> --- a/kernel/resource.c
>> +++ b/kernel/resource.c
>> @@ -321,13 +321,14 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
>>    }
>>    
>>    /**
>> - * find_next_iomem_res - Finds the lowest iomem resource that covers part of
>> - *			 [@start..@end].
>> + * find_next_res - Finds the lowest resource that covers part of
>> + *		   [@start..@end].
>>     *
>>     * If a resource is found, returns 0 and @*res is overwritten with the part
>>     * of the resource that's within [@start..@end]; if none is found, returns
>>     * -ENODEV.  Returns -EINVAL for invalid parameters.
>>     *
>> + * @parent:	resource tree root to search
>>     * @start:	start address of the resource searched for
>>     * @end:	end address of same resource
>>     * @flags:	flags which the resource must have
>> @@ -337,9 +338,9 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
>>     * The caller must specify @start, @end, @flags, and @desc
>>     * (which may be IORES_DESC_NONE).
>>     */
>> -static int find_next_iomem_res(resource_size_t start, resource_size_t end,
>> -			       unsigned long flags, unsigned long desc,
>> -			       struct resource *res)
>> +static int find_next_res(struct resource *parent, resource_size_t start,
>> +			 resource_size_t end, unsigned long flags,
>> +			 unsigned long desc, struct resource *res)
>>    {
>>    	struct resource *p;
>>    
>> @@ -351,7 +352,7 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
>>    
>>    	read_lock(&resource_lock);
>>    
>> -	for_each_resource(&iomem_resource, p, false) {
>> +	for_each_resource(parent, p, false) {
>>    		/* If we passed the resource we are looking for, stop */
>>    		if (p->start > end) {
>>    			p = NULL;
>> @@ -382,16 +383,23 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
>>    	return p ? 0 : -ENODEV;
>>    }
>>    
>> -static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
>> -				 unsigned long flags, unsigned long desc,
>> -				 void *arg,
>> -				 int (*func)(struct resource *, void *))
>> +static int find_next_iomem_res(resource_size_t start, resource_size_t end,
>> +			       unsigned long flags, unsigned long desc,
>> +			       struct resource *res)
>> +{
>> +	return find_next_res(&iomem_resource, start, end, flags, desc, res);
>> +}
>> +
>> +static int walk_res_desc(struct resource *parent, resource_size_t start,
>> +			 resource_size_t end, unsigned long flags,
>> +			 unsigned long desc, void *arg,
>> +			 int (*func)(struct resource *, void *))
>>    {
>>    	struct resource res;
>>    	int ret = -EINVAL;
>>    
>>    	while (start < end &&
>> -	       !find_next_iomem_res(start, end, flags, desc, &res)) {
>> +	       !find_next_res(parent, start, end, flags, desc, &res)) {
>>    		ret = (*func)(&res, arg);
>>    		if (ret)
>>    			break;
>> @@ -402,6 +410,15 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
>>    	return ret;
>>    }
>>    
>> +static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
>> +				 unsigned long flags, unsigned long desc,
>> +				 void *arg,
>> +				 int (*func)(struct resource *, void *))
>> +{
>> +	return walk_res_desc(&iomem_resource, start, end, flags, desc, arg, func);
>> +}
>> +
>> +
>>    /**
>>     * walk_iomem_res_desc - Walks through iomem resources and calls func()
>>     *			 with matching resource ranges.
>> @@ -426,6 +443,26 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
>>    }
>>    EXPORT_SYMBOL_GPL(walk_iomem_res_desc);
>>    
>> +#ifdef CONFIG_EFI_SOFT_RESERVE
>> +struct resource soft_reserve_resource = {
>> +	.name	= "Soft Reserved",
>> +	.start	= 0,
>> +	.end	= -1,
>> +	.desc	= IORES_DESC_SOFT_RESERVED,
>> +	.flags	= IORESOURCE_MEM,
>> +};
>> +EXPORT_SYMBOL_GPL(soft_reserve_resource);
>> +
>> +int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
>> +			       u64 start, u64 end, void *arg,
>> +			       int (*func)(struct resource *, void *))
>> +{
>> +	return walk_res_desc(&soft_reserve_resource, start, end, flags, desc,
>> +			     arg, func);
>> +}
>> +EXPORT_SYMBOL_GPL(walk_soft_reserve_res_desc);
>> +#endif
>> +
>>    /*
>>     * This function calls the @func callback against all memory ranges of type
>>     * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
>> @@ -648,6 +685,20 @@ int region_intersects(resource_size_t start, size_t size, unsigned long flags,
>>    }
>>    EXPORT_SYMBOL_GPL(region_intersects);
>>    
>> +int region_intersects_soft_reserve(resource_size_t start, size_t size,
>> +				   unsigned long flags, unsigned long desc)
> 
> 
> Shouldn't this function be implemented uder `#if CONFIG_EFI_SOFT_RESERVE`? Otherwise it may cause compilation failures when the config is disabled.

Fixed it.

Thanks
Smita
> 
> Thanks
> Zhijian