[PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready

Smita Koralahalli posted 6 patches 5 months, 3 weeks ago
There is a newer version of this series
[PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by Smita Koralahalli 5 months, 3 weeks ago
Insert Soft Reserved memory into a dedicated soft_reserve_resource tree
instead of the iomem_resource tree at boot.

Publishing Soft Reserved ranges into iomem too early causes conflicts with
CXL hotplug and region assembly failure, especially when Soft Reserved
overlaps CXL regions.

Re-inserting these ranges into iomem will be handled in follow-up patches,
after ensuring CXL window publication ordering is stabilized and when the
dax_hmem is ready to consume them.

This avoids trimming or deleting resources later and provides a cleaner
handoff between EFI-defined memory and CXL resource management.

Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/x86/kernel/e820.c    |  2 +-
 drivers/dax/hmem/device.c |  4 +--
 drivers/dax/hmem/hmem.c   |  8 +++++
 include/linux/ioport.h    | 24 +++++++++++++
 kernel/resource.c         | 73 +++++++++++++++++++++++++++++++++------
 5 files changed, 97 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index c3acbd26408b..aef1ff2cabda 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1153,7 +1153,7 @@ void __init e820__reserve_resources_late(void)
 	res = e820_res;
 	for (i = 0; i < e820_table->nr_entries; i++) {
 		if (!res->parent && res->end)
-			insert_resource_expand_to_fit(&iomem_resource, res);
+			insert_resource_late(res);
 		res++;
 	}
 
diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
index f9e1a76a04a9..22732b729017 100644
--- a/drivers/dax/hmem/device.c
+++ b/drivers/dax/hmem/device.c
@@ -83,8 +83,8 @@ static __init int hmem_register_one(struct resource *res, void *data)
 
 static __init int hmem_init(void)
 {
-	walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
-			IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
+	walk_soft_reserve_res_desc(IORES_DESC_SOFT_RESERVED, IORESOURCE_MEM, 0,
+				   -1, NULL, hmem_register_one);
 	return 0;
 }
 
diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
index c18451a37e4f..d5b8f06d531e 100644
--- a/drivers/dax/hmem/hmem.c
+++ b/drivers/dax/hmem/hmem.c
@@ -73,10 +73,18 @@ static int hmem_register_device(struct device *host, int target_nid,
 		return 0;
 	}
 
+#ifdef CONFIG_EFI_SOFT_RESERVE
+	rc = region_intersects_soft_reserve(res->start, resource_size(res),
+					    IORESOURCE_MEM,
+					    IORES_DESC_SOFT_RESERVED);
+	if (rc != REGION_INTERSECTS)
+		return 0;
+#else
 	rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
 			       IORES_DESC_SOFT_RESERVED);
 	if (rc != REGION_INTERSECTS)
 		return 0;
+#endif
 
 	id = memregion_alloc(GFP_KERNEL);
 	if (id < 0) {
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index e8b2d6aa4013..889bc4982777 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -232,6 +232,9 @@ struct resource_constraint {
 /* PC/ISA/whatever - the normal PC address spaces: IO and memory */
 extern struct resource ioport_resource;
 extern struct resource iomem_resource;
+#ifdef CONFIG_EFI_SOFT_RESERVE
+extern struct resource soft_reserve_resource;
+#endif
 
 extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
 extern int request_resource(struct resource *root, struct resource *new);
@@ -255,6 +258,22 @@ int adjust_resource(struct resource *res, resource_size_t start,
 		    resource_size_t size);
 resource_size_t resource_alignment(struct resource *res);
 
+
+#ifdef CONFIG_EFI_SOFT_RESERVE
+static inline void insert_resource_late(struct resource *new)
+{
+	if (new->desc == IORES_DESC_SOFT_RESERVED)
+		insert_resource_expand_to_fit(&soft_reserve_resource, new);
+	else
+		insert_resource_expand_to_fit(&iomem_resource, new);
+}
+#else
+static inline void insert_resource_late(struct resource *new)
+{
+	insert_resource_expand_to_fit(&iomem_resource, new);
+}
+#endif
+
 /**
  * resource_set_size - Calculate resource end address from size and start
  * @res: Resource descriptor
@@ -409,6 +428,11 @@ walk_system_ram_res_rev(u64 start, u64 end, void *arg,
 extern int
 walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
 		    void *arg, int (*func)(struct resource *, void *));
+int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
+			       u64 start, u64 end, void *arg,
+			       int (*func)(struct resource *, void *));
+int region_intersects_soft_reserve(resource_size_t start, size_t size,
+				   unsigned long flags, unsigned long desc);
 
 struct resource *devm_request_free_mem_region(struct device *dev,
 		struct resource *base, unsigned long size);
diff --git a/kernel/resource.c b/kernel/resource.c
index f9bb5481501a..8479a99441e2 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -321,13 +321,14 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
 }
 
 /**
- * find_next_iomem_res - Finds the lowest iomem resource that covers part of
- *			 [@start..@end].
+ * find_next_res - Finds the lowest resource that covers part of
+ *		   [@start..@end].
  *
  * If a resource is found, returns 0 and @*res is overwritten with the part
  * of the resource that's within [@start..@end]; if none is found, returns
  * -ENODEV.  Returns -EINVAL for invalid parameters.
  *
+ * @parent:	resource tree root to search
  * @start:	start address of the resource searched for
  * @end:	end address of same resource
  * @flags:	flags which the resource must have
@@ -337,9 +338,9 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
  * The caller must specify @start, @end, @flags, and @desc
  * (which may be IORES_DESC_NONE).
  */
-static int find_next_iomem_res(resource_size_t start, resource_size_t end,
-			       unsigned long flags, unsigned long desc,
-			       struct resource *res)
+static int find_next_res(struct resource *parent, resource_size_t start,
+			 resource_size_t end, unsigned long flags,
+			 unsigned long desc, struct resource *res)
 {
 	struct resource *p;
 
@@ -351,7 +352,7 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
 
 	read_lock(&resource_lock);
 
-	for_each_resource(&iomem_resource, p, false) {
+	for_each_resource(parent, p, false) {
 		/* If we passed the resource we are looking for, stop */
 		if (p->start > end) {
 			p = NULL;
@@ -382,16 +383,23 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
 	return p ? 0 : -ENODEV;
 }
 
-static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
-				 unsigned long flags, unsigned long desc,
-				 void *arg,
-				 int (*func)(struct resource *, void *))
+static int find_next_iomem_res(resource_size_t start, resource_size_t end,
+			       unsigned long flags, unsigned long desc,
+			       struct resource *res)
+{
+	return find_next_res(&iomem_resource, start, end, flags, desc, res);
+}
+
+static int walk_res_desc(struct resource *parent, resource_size_t start,
+			 resource_size_t end, unsigned long flags,
+			 unsigned long desc, void *arg,
+			 int (*func)(struct resource *, void *))
 {
 	struct resource res;
 	int ret = -EINVAL;
 
 	while (start < end &&
-	       !find_next_iomem_res(start, end, flags, desc, &res)) {
+	       !find_next_res(parent, start, end, flags, desc, &res)) {
 		ret = (*func)(&res, arg);
 		if (ret)
 			break;
@@ -402,6 +410,15 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
 	return ret;
 }
 
+static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
+				 unsigned long flags, unsigned long desc,
+				 void *arg,
+				 int (*func)(struct resource *, void *))
+{
+	return walk_res_desc(&iomem_resource, start, end, flags, desc, arg, func);
+}
+
+
 /**
  * walk_iomem_res_desc - Walks through iomem resources and calls func()
  *			 with matching resource ranges.
@@ -426,6 +443,26 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
 }
 EXPORT_SYMBOL_GPL(walk_iomem_res_desc);
 
+#ifdef CONFIG_EFI_SOFT_RESERVE
+struct resource soft_reserve_resource = {
+	.name	= "Soft Reserved",
+	.start	= 0,
+	.end	= -1,
+	.desc	= IORES_DESC_SOFT_RESERVED,
+	.flags	= IORESOURCE_MEM,
+};
+EXPORT_SYMBOL_GPL(soft_reserve_resource);
+
+int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
+			       u64 start, u64 end, void *arg,
+			       int (*func)(struct resource *, void *))
+{
+	return walk_res_desc(&soft_reserve_resource, start, end, flags, desc,
+			     arg, func);
+}
+EXPORT_SYMBOL_GPL(walk_soft_reserve_res_desc);
+#endif
+
 /*
  * This function calls the @func callback against all memory ranges of type
  * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
@@ -648,6 +685,20 @@ int region_intersects(resource_size_t start, size_t size, unsigned long flags,
 }
 EXPORT_SYMBOL_GPL(region_intersects);
 
+int region_intersects_soft_reserve(resource_size_t start, size_t size,
+				   unsigned long flags, unsigned long desc)
+{
+	int ret;
+
+	read_lock(&resource_lock);
+	ret = __region_intersects(&soft_reserve_resource, start, size, flags,
+				  desc);
+	read_unlock(&resource_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(region_intersects_soft_reserve);
+
 void __weak arch_remove_reservations(struct resource *avail)
 {
 }
-- 
2.17.1
Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by Borislav Petkov 5 months ago
On Fri, Aug 22, 2025 at 03:41:57AM +0000, Smita Koralahalli wrote:
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index c3acbd26408b..aef1ff2cabda 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -1153,7 +1153,7 @@ void __init e820__reserve_resources_late(void)
>  	res = e820_res;
>  	for (i = 0; i < e820_table->nr_entries; i++) {
>  		if (!res->parent && res->end)
> -			insert_resource_expand_to_fit(&iomem_resource, res);
> +			insert_resource_late(res);
>  		res++;
>  	}
>

Btw, this doesn't even build and cover letter doesn't say what it applies
ontop so I applied it on my pile of tip/master.

kernel/resource.c: In function ‘region_intersects_soft_reserve’:
kernel/resource.c:694:36: error: ‘soft_reserve_resource’ undeclared (first use in this function); did you mean ‘devm_release_resource’?
  694 |         ret = __region_intersects(&soft_reserve_resource, start, size, flags,
      |                                    ^~~~~~~~~~~~~~~~~~~~~
      |                                    devm_release_resource
kernel/resource.c:694:36: note: each undeclared identifier is reported only once for each function it appears in
make[3]: *** [scripts/Makefile.build:287: kernel/resource.o] Error 1
make[2]: *** [scripts/Makefile.build:556: kernel] Error 2
make[2]: *** Waiting for unfinished jobs....
make[1]: *** [/mnt/kernel/kernel/2nd/linux/Makefile:2011: .] Error 2
make: *** [Makefile:248: __sub-make] Error 2

Also, I'd do this resource insertion a bit differently:

insert_resource_expand_to_fit(struct resource *new)
{
	struct resource *root = &iomem_resource;

	if (new->desc == IORES_DESC_SOFT_RESERVED)
		root = &soft_reserve_resource;

	return __insert_resource_expand_to_fit(root, new);
}

and rename the current insert_resource_expand_to_fit() to the __ variant.

It looks like you want to intercept all callers of
insert_resource_expand_to_fit() instead of defining a separate set which works
on the soft-reserve thing.

Oh well, the resource code is yucky already.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by dan.j.williams@intel.com 1 month, 3 weeks ago
Borislav Petkov wrote:
[..]
> Also, I'd do this resource insertion a bit differently:
> 
> insert_resource_expand_to_fit(struct resource *new)
> {
> 	struct resource *root = &iomem_resource;
> 
> 	if (new->desc == IORES_DESC_SOFT_RESERVED)
> 		root = &soft_reserve_resource;
> 
> 	return __insert_resource_expand_to_fit(root, new);
> }
> 
> and rename the current insert_resource_expand_to_fit() to the __ variant.
> 
> It looks like you want to intercept all callers of
> insert_resource_expand_to_fit() instead of defining a separate set which works
> on the soft-reserve thing.

Finally catching up with this feedback after a few months. In the latest
code from Smita [1] this has become:

void insert_resource_expand_to_fit(struct resource *new)
{
       struct resource *root = &iomem_resource;

#ifdef CONFIG_EFI_SOFT_RESERVE
       if (new->desc == IORES_DESC_SOFT_RESERVED)
               root = &soft_reserve_resource;
#endif

       __insert_resource_expand_to_fit(root, new);
}

[1]: http://lore.kernel.org/20251120031925.87762-1-Smita.KoralahalliChannabasappa@amd.com

I am uncomfortable with insert_resource_expand_to_fit() having
build-conditional semantics. However, I agree with you that my initial
wrapping attempt with insert_resource_late() was not much better.

When someone revisits this code in 10 years I doubt they understand what
this redirect is doing. So, make the deferral explicit, open-coded, and
with some commentary in the only function where the distinction matters,
e820__reserve_resources_late().

Proposed incremental on top of v4 rebased to v6.19-rc1 that also
unconditionally defines 'soft_reserve_resource', drops some redundant
arguments to helper functions, and sprinkles more commentary about this
mechanism.

I will go ahead and pull this into the for-7.0/cxl-init topic branch.
Holler if any of this looks broken.

-- 8< --
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 61bf47553f0e..95662b2fb458 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -232,6 +232,7 @@ struct resource_constraint {
 /* PC/ISA/whatever - the normal PC address spaces: IO and memory */
 extern struct resource ioport_resource;
 extern struct resource iomem_resource;
+extern struct resource soft_reserve_resource;
 
 extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
 extern int request_resource(struct resource *root, struct resource *new);
@@ -242,8 +243,7 @@ extern void reserve_region_with_split(struct resource *root,
 			     const char *name);
 extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new);
 extern int insert_resource(struct resource *parent, struct resource *new);
-extern void __insert_resource_expand_to_fit(struct resource *root, struct resource *new);
-extern void insert_resource_expand_to_fit(struct resource *new);
+extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new);
 extern int remove_resource(struct resource *old);
 extern void arch_remove_reservations(struct resource *avail);
 extern int allocate_resource(struct resource *root, struct resource *new,
@@ -419,13 +419,10 @@ walk_system_ram_res_rev(u64 start, u64 end, void *arg,
 extern int
 walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
 		    void *arg, int (*func)(struct resource *, void *));
+extern int walk_soft_reserve_res(u64 start, u64 end, void *arg,
+				 int (*func)(struct resource *, void *));
 extern int
-walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
-			   u64 start, u64 end, void *arg,
-			   int (*func)(struct resource *, void *));
-extern int
-region_intersects_soft_reserve(resource_size_t start, size_t size,
-			       unsigned long flags, unsigned long desc);
+region_intersects_soft_reserve(resource_size_t start, size_t size);
 
 struct resource *devm_request_free_mem_region(struct device *dev,
 		struct resource *base, unsigned long size);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 0df7d9bacf82..69c050f50e18 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1151,11 +1151,18 @@ void __init e820__reserve_resources_late(void)
 	int i;
 	struct resource *res;
 
-	res = e820_res;
-	for (i = 0; i < e820_table->nr_entries; i++) {
-		if (!res->parent && res->end)
-			insert_resource_expand_to_fit(res);
-		res++;
+	for (i = 0, res = e820_res; i < e820_table->nr_entries; i++, res++) {
+		/* skip added or uninitialized resources */
+		if (res->parent || !res->end)
+			continue;
+
+		/* set aside soft-reserved resources for driver consideration */
+		if (res->desc == IORES_DESC_SOFT_RESERVED) {
+			insert_resource_expand_to_fit(&soft_reserve_resource, res);
+		} else {
+			/* publish the rest immediately */
+			insert_resource_expand_to_fit(&iomem_resource, res);
+		}
 	}
 
 	/*
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index e4a07fb4f5b2..77ac940e3013 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -856,7 +856,7 @@ static int add_cxl_resources(struct resource *cxl_res)
 		 */
 		cxl_set_public_resource(res, new);
 
-		__insert_resource_expand_to_fit(&iomem_resource, new);
+		insert_resource_expand_to_fit(&iomem_resource, new);
 
 		next = res->sibling;
 		while (next && resource_overlaps(new, next)) {
diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
index 22732b729017..56e3cbd181b5 100644
--- a/drivers/dax/hmem/device.c
+++ b/drivers/dax/hmem/device.c
@@ -83,8 +83,7 @@ static __init int hmem_register_one(struct resource *res, void *data)
 
 static __init int hmem_init(void)
 {
-	walk_soft_reserve_res_desc(IORES_DESC_SOFT_RESERVED, IORESOURCE_MEM, 0,
-				   -1, NULL, hmem_register_one);
+	walk_soft_reserve_res(0, -1, NULL, hmem_register_one);
 	return 0;
 }
 
diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
index 48f4642f4bb8..1cf7c2a0ee1c 100644
--- a/drivers/dax/hmem/hmem.c
+++ b/drivers/dax/hmem/hmem.c
@@ -73,9 +73,7 @@ static int hmem_register_device(struct device *host, int target_nid,
 		return 0;
 	}
 
-	rc = region_intersects_soft_reserve(res->start, resource_size(res),
-					    IORESOURCE_MEM,
-					    IORES_DESC_SOFT_RESERVED);
+	rc = region_intersects_soft_reserve(res->start, resource_size(res));
 	if (rc != REGION_INTERSECTS)
 		return 0;
 
diff --git a/kernel/resource.c b/kernel/resource.c
index 7287919b2380..7f1c252212d0 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -48,6 +48,14 @@ struct resource iomem_resource = {
 };
 EXPORT_SYMBOL(iomem_resource);
 
+struct resource soft_reserve_resource = {
+	.name	= "Soft Reserved",
+	.start	= 0,
+	.end	= -1,
+	.desc	= IORES_DESC_SOFT_RESERVED,
+	.flags	= IORESOURCE_MEM,
+};
+
 static DEFINE_RWLOCK(resource_lock);
 
 /*
@@ -451,24 +459,17 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
 }
 EXPORT_SYMBOL_GPL(walk_iomem_res_desc);
 
-#ifdef CONFIG_EFI_SOFT_RESERVE
-static struct resource soft_reserve_resource = {
-	.name	= "Soft Reserved",
-	.start	= 0,
-	.end	= -1,
-	.desc	= IORES_DESC_SOFT_RESERVED,
-	.flags	= IORESOURCE_MEM,
-};
-
-int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
-			       u64 start, u64 end, void *arg,
-			       int (*func)(struct resource *, void *))
+/*
+ * In support of device drivers claiming Soft Reserved resources, walk the Soft
+ * Reserved resource deferral tree.
+ */
+int walk_soft_reserve_res(u64 start, u64 end, void *arg,
+			  int (*func)(struct resource *, void *))
 {
-	return walk_res_desc(&soft_reserve_resource, start, end, flags, desc,
-			     arg, func);
+	return walk_res_desc(&soft_reserve_resource, start, end, IORESOURCE_MEM,
+			     IORES_DESC_SOFT_RESERVED, arg, func);
 }
-EXPORT_SYMBOL_GPL(walk_soft_reserve_res_desc);
-#endif
+EXPORT_SYMBOL_GPL(walk_soft_reserve_res);
 
 /*
  * This function calls the @func callback against all memory ranges of type
@@ -692,21 +693,22 @@ int region_intersects(resource_size_t start, size_t size, unsigned long flags,
 }
 EXPORT_SYMBOL_GPL(region_intersects);
 
-#ifdef CONFIG_EFI_SOFT_RESERVE
-int region_intersects_soft_reserve(resource_size_t start, size_t size,
-				   unsigned long flags, unsigned long desc)
+/*
+ * Check if the provided range is registered in the Soft Reserved resource
+ * deferral tree for driver consideration.
+ */
+int region_intersects_soft_reserve(resource_size_t start, size_t size)
 {
 	int ret;
 
 	read_lock(&resource_lock);
-	ret = __region_intersects(&soft_reserve_resource, start, size, flags,
-				  desc);
+	ret = __region_intersects(&soft_reserve_resource, start, size,
+				  IORESOURCE_MEM, IORES_DESC_SOFT_RESERVED);
 	read_unlock(&resource_lock);
 
 	return ret;
 }
 EXPORT_SYMBOL_GPL(region_intersects_soft_reserve);
-#endif
 
 void __weak arch_remove_reservations(struct resource *avail)
 {
@@ -1026,7 +1028,7 @@ EXPORT_SYMBOL_GPL(insert_resource);
  * Insert a resource into the resource tree, possibly expanding it in order
  * to make it encompass any conflicting resources.
  */
-void __insert_resource_expand_to_fit(struct resource *root, struct resource *new)
+void insert_resource_expand_to_fit(struct resource *root, struct resource *new)
 {
 	if (new->parent)
 		return;
@@ -1057,19 +1059,7 @@ void __insert_resource_expand_to_fit(struct resource *root, struct resource *new
  * to use this interface. The former are built-in and only the latter,
  * CXL, is a module.
  */
-EXPORT_SYMBOL_NS_GPL(__insert_resource_expand_to_fit, "CXL");
-
-void insert_resource_expand_to_fit(struct resource *new)
-{
-	struct resource *root = &iomem_resource;
-
-#ifdef CONFIG_EFI_SOFT_RESERVE
-	if (new->desc == IORES_DESC_SOFT_RESERVED)
-		root = &soft_reserve_resource;
-#endif
-
-	__insert_resource_expand_to_fit(root, new);
-}
+EXPORT_SYMBOL_NS_GPL(insert_resource_expand_to_fit, "CXL");
 
 /**
  * remove_resource - Remove a resource in the resource tree
Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by dan.j.williams@intel.com 3 weeks, 6 days ago
dan.j.williams@ wrote:
[..]
> I am uncomfortable with insert_resource_expand_to_fit() having
> build-conditional semantics. However, I agree with you that my initial
> wrapping attempt with insert_resource_late() was not much better.
> 
> When someone revisits this code in 10 years I doubt they understand what
> this redirect is doing. So, make the deferral explicit, open-coded, and
> with some commentary in the only function where the distinction matters,
> e820__reserve_resources_late().
> 
> Proposed incremental on top of v4 rebased to v6.19-rc1 that also
> unconditionally defines 'soft_reserve_resource', drops some redundant
> arguments to helper functions, and sprinkles more commentary about this
> mechanism.
> 
> I will go ahead and pull this into the for-7.0/cxl-init topic branch.
> Holler if any of this looks broken.

Boris et al, I have not heard any hollering. I am proceeding shortly
with the proposal below for Smita to build upon for her series.

-- 8< --
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 61bf47553f0e..95662b2fb458 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -232,6 +232,7 @@ struct resource_constraint {
 /* PC/ISA/whatever - the normal PC address spaces: IO and memory */
 extern struct resource ioport_resource;
 extern struct resource iomem_resource;
+extern struct resource soft_reserve_resource;
 
 extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
 extern int request_resource(struct resource *root, struct resource *new);
@@ -242,8 +243,7 @@ extern void reserve_region_with_split(struct resource *root,
 			     const char *name);
 extern struct resource *insert_resource_conflict(struct resource *parent, struct resource *new);
 extern int insert_resource(struct resource *parent, struct resource *new);
-extern void __insert_resource_expand_to_fit(struct resource *root, struct resource *new);
-extern void insert_resource_expand_to_fit(struct resource *new);
+extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new);
 extern int remove_resource(struct resource *old);
 extern void arch_remove_reservations(struct resource *avail);
 extern int allocate_resource(struct resource *root, struct resource *new,
@@ -419,13 +419,10 @@ walk_system_ram_res_rev(u64 start, u64 end, void *arg,
 extern int
 walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
 		    void *arg, int (*func)(struct resource *, void *));
+extern int walk_soft_reserve_res(u64 start, u64 end, void *arg,
+				 int (*func)(struct resource *, void *));
 extern int
-walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
-			   u64 start, u64 end, void *arg,
-			   int (*func)(struct resource *, void *));
-extern int
-region_intersects_soft_reserve(resource_size_t start, size_t size,
-			       unsigned long flags, unsigned long desc);
+region_intersects_soft_reserve(resource_size_t start, size_t size);
 
 struct resource *devm_request_free_mem_region(struct device *dev,
 		struct resource *base, unsigned long size);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 0df7d9bacf82..69c050f50e18 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -1151,11 +1151,18 @@ void __init e820__reserve_resources_late(void)
 	int i;
 	struct resource *res;
 
-	res = e820_res;
-	for (i = 0; i < e820_table->nr_entries; i++) {
-		if (!res->parent && res->end)
-			insert_resource_expand_to_fit(res);
-		res++;
+	for (i = 0, res = e820_res; i < e820_table->nr_entries; i++, res++) {
+		/* skip added or uninitialized resources */
+		if (res->parent || !res->end)
+			continue;
+
+		/* set aside soft-reserved resources for driver consideration */
+		if (res->desc == IORES_DESC_SOFT_RESERVED) {
+			insert_resource_expand_to_fit(&soft_reserve_resource, res);
+		} else {
+			/* publish the rest immediately */
+			insert_resource_expand_to_fit(&iomem_resource, res);
+		}
 	}
 
 	/*
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
index e4a07fb4f5b2..77ac940e3013 100644
--- a/drivers/cxl/acpi.c
+++ b/drivers/cxl/acpi.c
@@ -856,7 +856,7 @@ static int add_cxl_resources(struct resource *cxl_res)
 		 */
 		cxl_set_public_resource(res, new);
 
-		__insert_resource_expand_to_fit(&iomem_resource, new);
+		insert_resource_expand_to_fit(&iomem_resource, new);
 
 		next = res->sibling;
 		while (next && resource_overlaps(new, next)) {
diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
index 22732b729017..56e3cbd181b5 100644
--- a/drivers/dax/hmem/device.c
+++ b/drivers/dax/hmem/device.c
@@ -83,8 +83,7 @@ static __init int hmem_register_one(struct resource *res, void *data)
 
 static __init int hmem_init(void)
 {
-	walk_soft_reserve_res_desc(IORES_DESC_SOFT_RESERVED, IORESOURCE_MEM, 0,
-				   -1, NULL, hmem_register_one);
+	walk_soft_reserve_res(0, -1, NULL, hmem_register_one);
 	return 0;
 }
 
diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
index 48f4642f4bb8..1cf7c2a0ee1c 100644
--- a/drivers/dax/hmem/hmem.c
+++ b/drivers/dax/hmem/hmem.c
@@ -73,9 +73,7 @@ static int hmem_register_device(struct device *host, int target_nid,
 		return 0;
 	}
 
-	rc = region_intersects_soft_reserve(res->start, resource_size(res),
-					    IORESOURCE_MEM,
-					    IORES_DESC_SOFT_RESERVED);
+	rc = region_intersects_soft_reserve(res->start, resource_size(res));
 	if (rc != REGION_INTERSECTS)
 		return 0;
 
diff --git a/kernel/resource.c b/kernel/resource.c
index 7287919b2380..7f1c252212d0 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -48,6 +48,14 @@ struct resource iomem_resource = {
 };
 EXPORT_SYMBOL(iomem_resource);
 
+struct resource soft_reserve_resource = {
+	.name	= "Soft Reserved",
+	.start	= 0,
+	.end	= -1,
+	.desc	= IORES_DESC_SOFT_RESERVED,
+	.flags	= IORESOURCE_MEM,
+};
+
 static DEFINE_RWLOCK(resource_lock);
 
 /*
@@ -451,24 +459,17 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
 }
 EXPORT_SYMBOL_GPL(walk_iomem_res_desc);
 
-#ifdef CONFIG_EFI_SOFT_RESERVE
-static struct resource soft_reserve_resource = {
-	.name	= "Soft Reserved",
-	.start	= 0,
-	.end	= -1,
-	.desc	= IORES_DESC_SOFT_RESERVED,
-	.flags	= IORESOURCE_MEM,
-};
-
-int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
-			       u64 start, u64 end, void *arg,
-			       int (*func)(struct resource *, void *))
+/*
+ * In support of device drivers claiming Soft Reserved resources, walk the Soft
+ * Reserved resource deferral tree.
+ */
+int walk_soft_reserve_res(u64 start, u64 end, void *arg,
+			  int (*func)(struct resource *, void *))
 {
-	return walk_res_desc(&soft_reserve_resource, start, end, flags, desc,
-			     arg, func);
+	return walk_res_desc(&soft_reserve_resource, start, end, IORESOURCE_MEM,
+			     IORES_DESC_SOFT_RESERVED, arg, func);
 }
-EXPORT_SYMBOL_GPL(walk_soft_reserve_res_desc);
-#endif
+EXPORT_SYMBOL_GPL(walk_soft_reserve_res);
 
 /*
  * This function calls the @func callback against all memory ranges of type
@@ -692,21 +693,22 @@ int region_intersects(resource_size_t start, size_t size, unsigned long flags,
 }
 EXPORT_SYMBOL_GPL(region_intersects);
 
-#ifdef CONFIG_EFI_SOFT_RESERVE
-int region_intersects_soft_reserve(resource_size_t start, size_t size,
-				   unsigned long flags, unsigned long desc)
+/*
+ * Check if the provided range is registered in the Soft Reserved resource
+ * deferral tree for driver consideration.
+ */
+int region_intersects_soft_reserve(resource_size_t start, size_t size)
 {
 	int ret;
 
 	read_lock(&resource_lock);
-	ret = __region_intersects(&soft_reserve_resource, start, size, flags,
-				  desc);
+	ret = __region_intersects(&soft_reserve_resource, start, size,
+				  IORESOURCE_MEM, IORES_DESC_SOFT_RESERVED);
 	read_unlock(&resource_lock);
 
 	return ret;
 }
 EXPORT_SYMBOL_GPL(region_intersects_soft_reserve);
-#endif
 
 void __weak arch_remove_reservations(struct resource *avail)
 {
@@ -1026,7 +1028,7 @@ EXPORT_SYMBOL_GPL(insert_resource);
  * Insert a resource into the resource tree, possibly expanding it in order
  * to make it encompass any conflicting resources.
  */
-void __insert_resource_expand_to_fit(struct resource *root, struct resource *new)
+void insert_resource_expand_to_fit(struct resource *root, struct resource *new)
 {
 	if (new->parent)
 		return;
@@ -1057,19 +1059,7 @@ void __insert_resource_expand_to_fit(struct resource *root, struct resource *new
  * to use this interface. The former are built-in and only the latter,
  * CXL, is a module.
  */
-EXPORT_SYMBOL_NS_GPL(__insert_resource_expand_to_fit, "CXL");
-
-void insert_resource_expand_to_fit(struct resource *new)
-{
-	struct resource *root = &iomem_resource;
-
-#ifdef CONFIG_EFI_SOFT_RESERVE
-	if (new->desc == IORES_DESC_SOFT_RESERVED)
-		root = &soft_reserve_resource;
-#endif
-
-	__insert_resource_expand_to_fit(root, new);
-}
+EXPORT_SYMBOL_NS_GPL(insert_resource_expand_to_fit, "CXL");
 
 /**
  * remove_resource - Remove a resource in the resource tree
Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by Koralahalli Channabasappa, Smita 4 months, 1 week ago
Hi Boris,

On 9/9/2025 9:12 AM, Borislav Petkov wrote:
> On Fri, Aug 22, 2025 at 03:41:57AM +0000, Smita Koralahalli wrote:
>> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
>> index c3acbd26408b..aef1ff2cabda 100644
>> --- a/arch/x86/kernel/e820.c
>> +++ b/arch/x86/kernel/e820.c
>> @@ -1153,7 +1153,7 @@ void __init e820__reserve_resources_late(void)
>>   	res = e820_res;
>>   	for (i = 0; i < e820_table->nr_entries; i++) {
>>   		if (!res->parent && res->end)
>> -			insert_resource_expand_to_fit(&iomem_resource, res);
>> +			insert_resource_late(res);
>>   		res++;
>>   	}
>>
> 
> Btw, this doesn't even build and cover letter doesn't say what it applies
> ontop so I applied it on my pile of tip/master.
> 
> kernel/resource.c: In function ‘region_intersects_soft_reserve’:
> kernel/resource.c:694:36: error: ‘soft_reserve_resource’ undeclared (first use in this function); did you mean ‘devm_release_resource’?
>    694 |         ret = __region_intersects(&soft_reserve_resource, start, size, flags,
>        |                                    ^~~~~~~~~~~~~~~~~~~~~
>        |                                    devm_release_resource
> kernel/resource.c:694:36: note: each undeclared identifier is reported only once for each function it appears in
> make[3]: *** [scripts/Makefile.build:287: kernel/resource.o] Error 1
> make[2]: *** [scripts/Makefile.build:556: kernel] Error 2
> make[2]: *** Waiting for unfinished jobs....
> make[1]: *** [/mnt/kernel/kernel/2nd/linux/Makefile:2011: .] Error 2
> make: *** [Makefile:248: __sub-make] Error 2

Apologies for the delay.

This was based on mainline. I have rebased the series onto the latest 
mainline and sent out a new revision and noted it in the cover letter.

https://lore.kernel.org/all/20250930044757.214798-2-Smita.KoralahalliChannabasappa@amd.com/

> 
> Also, I'd do this resource insertion a bit differently:
> 
> insert_resource_expand_to_fit(struct resource *new)
> {
> 	struct resource *root = &iomem_resource;
> 
> 	if (new->desc == IORES_DESC_SOFT_RESERVED)
> 		root = &soft_reserve_resource;
> 
> 	return __insert_resource_expand_to_fit(root, new);
> }
> 
> and rename the current insert_resource_expand_to_fit() to the __ variant.

I have made these changes as well.

Thanks
Smita

> 
> It looks like you want to intercept all callers of
> insert_resource_expand_to_fit() instead of defining a separate set which works
> on the soft-reserve thing.
> 
> Oh well, the resource code is yucky already.
> 

Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by dan.j.williams@intel.com 5 months ago
[ add Boris and Ard ]

Ard, Boris, can you have a look at the touches to early e820/x86 init
(insert_resource_late()) and give an ack (or nak). The general problem
here is conflicts between e820 memory resources and CXL subsystem memory
resources.

Smita Koralahalli wrote:
> Insert Soft Reserved memory into a dedicated soft_reserve_resource tree
> instead of the iomem_resource tree at boot.
> 
> Publishing Soft Reserved ranges into iomem too early causes conflicts with
> CXL hotplug and region assembly failure, especially when Soft Reserved
> overlaps CXL regions.
> 
> Re-inserting these ranges into iomem will be handled in follow-up patches,
> after ensuring CXL window publication ordering is stabilized and when the
> dax_hmem is ready to consume them.
> 
> This avoids trimming or deleting resources later and provides a cleaner
> handoff between EFI-defined memory and CXL resource management.
> 
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>


> Signed-off-by: Dan Williams <dan.j.williams@intel.com>

Smita, if you added changes this should have Co-developed-by. Otherwise
plain Signed-off-by is interpreted as only chain of custody.  Any other
patches that you add my Signed-off-by to should also have
Co-developed-by or be From: me.

Alternatively if you completely rewritel a patch with your own approach
then note the source (with a Link:) and leave off the original SOB.

Lastly, in this case it looks unmodified from what I wrote? Then it
should be:

From: Dan Williams <dan.j.williams@intel.com>

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>

...to show the chain of custody of you forwarding a diff authored
completely by someone else.
Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by Koralahalli Channabasappa, Smita 5 months ago
On 9/8/2025 4:01 PM, dan.j.williams@intel.com wrote:
> [ add Boris and Ard ]
> 
> Ard, Boris, can you have a look at the touches to early e820/x86 init
> (insert_resource_late()) and give an ack (or nak). The general problem
> here is conflicts between e820 memory resources and CXL subsystem memory
> resources.
> 
> Smita Koralahalli wrote:
>> Insert Soft Reserved memory into a dedicated soft_reserve_resource tree
>> instead of the iomem_resource tree at boot.
>>
>> Publishing Soft Reserved ranges into iomem too early causes conflicts with
>> CXL hotplug and region assembly failure, especially when Soft Reserved
>> overlaps CXL regions.
>>
>> Re-inserting these ranges into iomem will be handled in follow-up patches,
>> after ensuring CXL window publication ordering is stabilized and when the
>> dax_hmem is ready to consume them.
>>
>> This avoids trimming or deleting resources later and provides a cleaner
>> handoff between EFI-defined memory and CXL resource management.
>>
>> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> 
> 
>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> 
> Smita, if you added changes this should have Co-developed-by. Otherwise
> plain Signed-off-by is interpreted as only chain of custody.  Any other
> patches that you add my Signed-off-by to should also have
> Co-developed-by or be From: me.
> 
> Alternatively if you completely rewritel a patch with your own approach
> then note the source (with a Link:) and leave off the original SOB.
> 
> Lastly, in this case it looks unmodified from what I wrote? Then it
> should be:
> 
> From: Dan Williams <dan.j.williams@intel.com>
> 
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> 
> ...to show the chain of custody of you forwarding a diff authored
> completely by someone else.

Thanks for clarifying, Dan. I wasn’t aware of the distinction before 
(especially to handle the chain of custody..). I will update to reflect 
that properly and will also be careful with how I handle authorship and 
sign-offs in future submissions.

Thanks
Smita
Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by dan.j.williams@intel.com 4 months, 2 weeks ago
Koralahalli Channabasappa, Smita wrote:
> On 9/8/2025 4:01 PM, dan.j.williams@intel.com wrote:
> > [ add Boris and Ard ]
> > 
> > Ard, Boris, can you have a look at the touches to early e820/x86 init
> > (insert_resource_late()) and give an ack (or nak). The general problem
> > here is conflicts between e820 memory resources and CXL subsystem memory
> > resources.
> > 
> > Smita Koralahalli wrote:
> >> Insert Soft Reserved memory into a dedicated soft_reserve_resource tree
> >> instead of the iomem_resource tree at boot.
> >>
> >> Publishing Soft Reserved ranges into iomem too early causes conflicts with
> >> CXL hotplug and region assembly failure, especially when Soft Reserved
> >> overlaps CXL regions.
> >>
> >> Re-inserting these ranges into iomem will be handled in follow-up patches,
> >> after ensuring CXL window publication ordering is stabilized and when the
> >> dax_hmem is ready to consume them.
> >>
> >> This avoids trimming or deleting resources later and provides a cleaner
> >> handoff between EFI-defined memory and CXL resource management.
> >>
> >> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> > 
> > 
> >> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > 
> > Smita, if you added changes this should have Co-developed-by. Otherwise
> > plain Signed-off-by is interpreted as only chain of custody.  Any other
> > patches that you add my Signed-off-by to should also have
> > Co-developed-by or be From: me.
> > 
> > Alternatively if you completely rewritel a patch with your own approach
> > then note the source (with a Link:) and leave off the original SOB.
> > 
> > Lastly, in this case it looks unmodified from what I wrote? Then it
> > should be:
> > 
> > From: Dan Williams <dan.j.williams@intel.com>
> > 
> > Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> > Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> > 
> > ...to show the chain of custody of you forwarding a diff authored
> > completely by someone else.
> 
> Thanks for clarifying, Dan. I wasn’t aware of the distinction before 
> (especially to handle the chain of custody..). I will update to reflect 
> that properly and will also be careful with how I handle authorship and 
> sign-offs in future submissions.

Yeah, the choice to replace From: is subjective and I usually reserve
that for significant rewrites. If any of the original patch remains then
add Co-developed-by + Signed-off-by. If none of the original patch
remains I do still like to say:

"based on an original patch by ..." with a Link: to that inspiration
patch.

...and if you just forward the original, keep From: untouched and just
add your own Signed-off-by as documented in
Documentation/process/submitting-patches.rst.
Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by Zhijian Li (Fujitsu) 5 months, 1 week ago

On 22/08/2025 11:41, Smita Koralahalli wrote:
> Insert Soft Reserved memory into a dedicated soft_reserve_resource tree
> instead of the iomem_resource tree at boot.
> 
> Publishing Soft Reserved ranges into iomem too early causes conflicts with
> CXL hotplug and region assembly failure, especially when Soft Reserved
> overlaps CXL regions.
> 
> Re-inserting these ranges into iomem will be handled in follow-up patches,
> after ensuring CXL window publication ordering is stabilized and when the
> dax_hmem is ready to consume them.
> 
> This avoids trimming or deleting resources later and provides a cleaner
> handoff between EFI-defined memory and CXL resource management.
> 
> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
> ---
>   arch/x86/kernel/e820.c    |  2 +-
>   drivers/dax/hmem/device.c |  4 +--
>   drivers/dax/hmem/hmem.c   |  8 +++++
>   include/linux/ioport.h    | 24 +++++++++++++
>   kernel/resource.c         | 73 +++++++++++++++++++++++++++++++++------
>   5 files changed, 97 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
> index c3acbd26408b..aef1ff2cabda 100644
> --- a/arch/x86/kernel/e820.c
> +++ b/arch/x86/kernel/e820.c
> @@ -1153,7 +1153,7 @@ void __init e820__reserve_resources_late(void)
>   	res = e820_res;
>   	for (i = 0; i < e820_table->nr_entries; i++) {
>   		if (!res->parent && res->end)
> -			insert_resource_expand_to_fit(&iomem_resource, res);
> +			insert_resource_late(res);
>   		res++;
>   	}
>   
> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
> index f9e1a76a04a9..22732b729017 100644
> --- a/drivers/dax/hmem/device.c
> +++ b/drivers/dax/hmem/device.c
> @@ -83,8 +83,8 @@ static __init int hmem_register_one(struct resource *res, void *data)
>   
>   static __init int hmem_init(void)
>   {
> -	walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
> -			IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
> +	walk_soft_reserve_res_desc(IORES_DESC_SOFT_RESERVED, IORESOURCE_MEM, 0,
> +				   -1, NULL, hmem_register_one);
>   	return 0;
>   }
>   
> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
> index c18451a37e4f..d5b8f06d531e 100644
> --- a/drivers/dax/hmem/hmem.c
> +++ b/drivers/dax/hmem/hmem.c
> @@ -73,10 +73,18 @@ static int hmem_register_device(struct device *host, int target_nid,
>   		return 0;
>   	}
>   
> +#ifdef CONFIG_EFI_SOFT_RESERVE


Note that dax_kmem currently depends on CONFIG_EFI_SOFT_RESERVED, so this conditional check may be redundant.



> +	rc = region_intersects_soft_reserve(res->start, resource_size(res),
> +					    IORESOURCE_MEM,
> +					    IORES_DESC_SOFT_RESERVED);
> +	if (rc != REGION_INTERSECTS)
> +		return 0;
> +#else
>   	rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
>   			       IORES_DESC_SOFT_RESERVED);
>   	if (rc != REGION_INTERSECTS)
>   		return 0;
> +#endif
>   

Additionally, please add a TODO note here (e.g., "Add soft-reserved memory back to iomem").


>   	id = memregion_alloc(GFP_KERNEL);
>   	if (id < 0) {
> diff --git a/include/linux/ioport.h b/include/linux/ioport.h
> index e8b2d6aa4013..889bc4982777 100644
> --- a/include/linux/ioport.h
> +++ b/include/linux/ioport.h
> @@ -232,6 +232,9 @@ struct resource_constraint {
>   /* PC/ISA/whatever - the normal PC address spaces: IO and memory */
>   extern struct resource ioport_resource;
>   extern struct resource iomem_resource;
> +#ifdef CONFIG_EFI_SOFT_RESERVE
> +extern struct resource soft_reserve_resource;
> +#endif
>   
>   extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
>   extern int request_resource(struct resource *root, struct resource *new);
> @@ -255,6 +258,22 @@ int adjust_resource(struct resource *res, resource_size_t start,
>   		    resource_size_t size);
>   resource_size_t resource_alignment(struct resource *res);
>   
> +
> +#ifdef CONFIG_EFI_SOFT_RESERVE
> +static inline void insert_resource_late(struct resource *new)
> +{
> +	if (new->desc == IORES_DESC_SOFT_RESERVED)
> +		insert_resource_expand_to_fit(&soft_reserve_resource, new);
> +	else
> +		insert_resource_expand_to_fit(&iomem_resource, new);
> +}
> +#else
> +static inline void insert_resource_late(struct resource *new)
> +{
> +	insert_resource_expand_to_fit(&iomem_resource, new);
> +}
> +#endif
> +
>   /**
>    * resource_set_size - Calculate resource end address from size and start
>    * @res: Resource descriptor
> @@ -409,6 +428,11 @@ walk_system_ram_res_rev(u64 start, u64 end, void *arg,
>   extern int
>   walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
>   		    void *arg, int (*func)(struct resource *, void *));
> +int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
> +			       u64 start, u64 end, void *arg,
> +			       int (*func)(struct resource *, void *));
> +int region_intersects_soft_reserve(resource_size_t start, size_t size,
> +				   unsigned long flags, unsigned long desc);
>   
>   struct resource *devm_request_free_mem_region(struct device *dev,
>   		struct resource *base, unsigned long size);
> diff --git a/kernel/resource.c b/kernel/resource.c
> index f9bb5481501a..8479a99441e2 100644
> --- a/kernel/resource.c
> +++ b/kernel/resource.c
> @@ -321,13 +321,14 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
>   }
>   
>   /**
> - * find_next_iomem_res - Finds the lowest iomem resource that covers part of
> - *			 [@start..@end].
> + * find_next_res - Finds the lowest resource that covers part of
> + *		   [@start..@end].
>    *
>    * If a resource is found, returns 0 and @*res is overwritten with the part
>    * of the resource that's within [@start..@end]; if none is found, returns
>    * -ENODEV.  Returns -EINVAL for invalid parameters.
>    *
> + * @parent:	resource tree root to search
>    * @start:	start address of the resource searched for
>    * @end:	end address of same resource
>    * @flags:	flags which the resource must have
> @@ -337,9 +338,9 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
>    * The caller must specify @start, @end, @flags, and @desc
>    * (which may be IORES_DESC_NONE).
>    */
> -static int find_next_iomem_res(resource_size_t start, resource_size_t end,
> -			       unsigned long flags, unsigned long desc,
> -			       struct resource *res)
> +static int find_next_res(struct resource *parent, resource_size_t start,
> +			 resource_size_t end, unsigned long flags,
> +			 unsigned long desc, struct resource *res)
>   {
>   	struct resource *p;
>   
> @@ -351,7 +352,7 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
>   
>   	read_lock(&resource_lock);
>   
> -	for_each_resource(&iomem_resource, p, false) {
> +	for_each_resource(parent, p, false) {
>   		/* If we passed the resource we are looking for, stop */
>   		if (p->start > end) {
>   			p = NULL;
> @@ -382,16 +383,23 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
>   	return p ? 0 : -ENODEV;
>   }
>   
> -static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
> -				 unsigned long flags, unsigned long desc,
> -				 void *arg,
> -				 int (*func)(struct resource *, void *))
> +static int find_next_iomem_res(resource_size_t start, resource_size_t end,
> +			       unsigned long flags, unsigned long desc,
> +			       struct resource *res)
> +{
> +	return find_next_res(&iomem_resource, start, end, flags, desc, res);
> +}
> +
> +static int walk_res_desc(struct resource *parent, resource_size_t start,
> +			 resource_size_t end, unsigned long flags,
> +			 unsigned long desc, void *arg,
> +			 int (*func)(struct resource *, void *))
>   {
>   	struct resource res;
>   	int ret = -EINVAL;
>   
>   	while (start < end &&
> -	       !find_next_iomem_res(start, end, flags, desc, &res)) {
> +	       !find_next_res(parent, start, end, flags, desc, &res)) {
>   		ret = (*func)(&res, arg);
>   		if (ret)
>   			break;
> @@ -402,6 +410,15 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
>   	return ret;
>   }
>   
> +static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
> +				 unsigned long flags, unsigned long desc,
> +				 void *arg,
> +				 int (*func)(struct resource *, void *))
> +{
> +	return walk_res_desc(&iomem_resource, start, end, flags, desc, arg, func);
> +}
> +
> +
>   /**
>    * walk_iomem_res_desc - Walks through iomem resources and calls func()
>    *			 with matching resource ranges.
> @@ -426,6 +443,26 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
>   }
>   EXPORT_SYMBOL_GPL(walk_iomem_res_desc);
>   
> +#ifdef CONFIG_EFI_SOFT_RESERVE
> +struct resource soft_reserve_resource = {
> +	.name	= "Soft Reserved",
> +	.start	= 0,
> +	.end	= -1,
> +	.desc	= IORES_DESC_SOFT_RESERVED,
> +	.flags	= IORESOURCE_MEM,
> +};
> +EXPORT_SYMBOL_GPL(soft_reserve_resource);
> +
> +int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
> +			       u64 start, u64 end, void *arg,
> +			       int (*func)(struct resource *, void *))
> +{
> +	return walk_res_desc(&soft_reserve_resource, start, end, flags, desc,
> +			     arg, func);
> +}
> +EXPORT_SYMBOL_GPL(walk_soft_reserve_res_desc);
> +#endif
> +
>   /*
>    * This function calls the @func callback against all memory ranges of type
>    * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
> @@ -648,6 +685,20 @@ int region_intersects(resource_size_t start, size_t size, unsigned long flags,
>   }
>   EXPORT_SYMBOL_GPL(region_intersects);
>   
> +int region_intersects_soft_reserve(resource_size_t start, size_t size,
> +				   unsigned long flags, unsigned long desc)


Shouldn't this function be implemented uder `#if CONFIG_EFI_SOFT_RESERVE`? Otherwise it may cause compilation failures when the config is disabled.


Thanks
Zhijian
Re: [PATCH 1/6] dax/hmem, e820, resource: Defer Soft Reserved registration until hmem is ready
Posted by Koralahalli Channabasappa, Smita 4 months, 1 week ago
Hi Zhijian,

Sorry for the delay here.

On 8/31/2025 7:59 PM, Zhijian Li (Fujitsu) wrote:
> 
> 
> On 22/08/2025 11:41, Smita Koralahalli wrote:
>> Insert Soft Reserved memory into a dedicated soft_reserve_resource tree
>> instead of the iomem_resource tree at boot.
>>
>> Publishing Soft Reserved ranges into iomem too early causes conflicts with
>> CXL hotplug and region assembly failure, especially when Soft Reserved
>> overlaps CXL regions.
>>
>> Re-inserting these ranges into iomem will be handled in follow-up patches,
>> after ensuring CXL window publication ordering is stabilized and when the
>> dax_hmem is ready to consume them.
>>
>> This avoids trimming or deleting resources later and provides a cleaner
>> handoff between EFI-defined memory and CXL resource management.
>>
>> Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@amd.com>
>> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
>> ---
>>    arch/x86/kernel/e820.c    |  2 +-
>>    drivers/dax/hmem/device.c |  4 +--
>>    drivers/dax/hmem/hmem.c   |  8 +++++
>>    include/linux/ioport.h    | 24 +++++++++++++
>>    kernel/resource.c         | 73 +++++++++++++++++++++++++++++++++------
>>    5 files changed, 97 insertions(+), 14 deletions(-)
>>
>> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
>> index c3acbd26408b..aef1ff2cabda 100644
>> --- a/arch/x86/kernel/e820.c
>> +++ b/arch/x86/kernel/e820.c
>> @@ -1153,7 +1153,7 @@ void __init e820__reserve_resources_late(void)
>>    	res = e820_res;
>>    	for (i = 0; i < e820_table->nr_entries; i++) {
>>    		if (!res->parent && res->end)
>> -			insert_resource_expand_to_fit(&iomem_resource, res);
>> +			insert_resource_late(res);
>>    		res++;
>>    	}
>>    
>> diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
>> index f9e1a76a04a9..22732b729017 100644
>> --- a/drivers/dax/hmem/device.c
>> +++ b/drivers/dax/hmem/device.c
>> @@ -83,8 +83,8 @@ static __init int hmem_register_one(struct resource *res, void *data)
>>    
>>    static __init int hmem_init(void)
>>    {
>> -	walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
>> -			IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
>> +	walk_soft_reserve_res_desc(IORES_DESC_SOFT_RESERVED, IORESOURCE_MEM, 0,
>> +				   -1, NULL, hmem_register_one);
>>    	return 0;
>>    }
>>    
>> diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
>> index c18451a37e4f..d5b8f06d531e 100644
>> --- a/drivers/dax/hmem/hmem.c
>> +++ b/drivers/dax/hmem/hmem.c
>> @@ -73,10 +73,18 @@ static int hmem_register_device(struct device *host, int target_nid,
>>    		return 0;
>>    	}
>>    
>> +#ifdef CONFIG_EFI_SOFT_RESERVE
> 
> 
> Note that dax_kmem currently depends on CONFIG_EFI_SOFT_RESERVED, so this conditional check may be redundant.

Removed in v2.

> 
> 
> 
>> +	rc = region_intersects_soft_reserve(res->start, resource_size(res),
>> +					    IORESOURCE_MEM,
>> +					    IORES_DESC_SOFT_RESERVED);
>> +	if (rc != REGION_INTERSECTS)
>> +		return 0;
>> +#else
>>    	rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM,
>>    			       IORES_DESC_SOFT_RESERVED);
>>    	if (rc != REGION_INTERSECTS)
>>    		return 0;
>> +#endif
>>    
> 
> Additionally, please add a TODO note here (e.g., "Add soft-reserved memory back to iomem").

Added.

> 
> 
>>    	id = memregion_alloc(GFP_KERNEL);
>>    	if (id < 0) {
>> diff --git a/include/linux/ioport.h b/include/linux/ioport.h
>> index e8b2d6aa4013..889bc4982777 100644
>> --- a/include/linux/ioport.h
>> +++ b/include/linux/ioport.h
>> @@ -232,6 +232,9 @@ struct resource_constraint {
>>    /* PC/ISA/whatever - the normal PC address spaces: IO and memory */
>>    extern struct resource ioport_resource;
>>    extern struct resource iomem_resource;
>> +#ifdef CONFIG_EFI_SOFT_RESERVE
>> +extern struct resource soft_reserve_resource;
>> +#endif
>>    
>>    extern struct resource *request_resource_conflict(struct resource *root, struct resource *new);
>>    extern int request_resource(struct resource *root, struct resource *new);
>> @@ -255,6 +258,22 @@ int adjust_resource(struct resource *res, resource_size_t start,
>>    		    resource_size_t size);
>>    resource_size_t resource_alignment(struct resource *res);
>>    
>> +
>> +#ifdef CONFIG_EFI_SOFT_RESERVE
>> +static inline void insert_resource_late(struct resource *new)
>> +{
>> +	if (new->desc == IORES_DESC_SOFT_RESERVED)
>> +		insert_resource_expand_to_fit(&soft_reserve_resource, new);
>> +	else
>> +		insert_resource_expand_to_fit(&iomem_resource, new);
>> +}
>> +#else
>> +static inline void insert_resource_late(struct resource *new)
>> +{
>> +	insert_resource_expand_to_fit(&iomem_resource, new);
>> +}
>> +#endif
>> +
>>    /**
>>     * resource_set_size - Calculate resource end address from size and start
>>     * @res: Resource descriptor
>> @@ -409,6 +428,11 @@ walk_system_ram_res_rev(u64 start, u64 end, void *arg,
>>    extern int
>>    walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end,
>>    		    void *arg, int (*func)(struct resource *, void *));
>> +int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
>> +			       u64 start, u64 end, void *arg,
>> +			       int (*func)(struct resource *, void *));
>> +int region_intersects_soft_reserve(resource_size_t start, size_t size,
>> +				   unsigned long flags, unsigned long desc);
>>    
>>    struct resource *devm_request_free_mem_region(struct device *dev,
>>    		struct resource *base, unsigned long size);
>> diff --git a/kernel/resource.c b/kernel/resource.c
>> index f9bb5481501a..8479a99441e2 100644
>> --- a/kernel/resource.c
>> +++ b/kernel/resource.c
>> @@ -321,13 +321,14 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
>>    }
>>    
>>    /**
>> - * find_next_iomem_res - Finds the lowest iomem resource that covers part of
>> - *			 [@start..@end].
>> + * find_next_res - Finds the lowest resource that covers part of
>> + *		   [@start..@end].
>>     *
>>     * If a resource is found, returns 0 and @*res is overwritten with the part
>>     * of the resource that's within [@start..@end]; if none is found, returns
>>     * -ENODEV.  Returns -EINVAL for invalid parameters.
>>     *
>> + * @parent:	resource tree root to search
>>     * @start:	start address of the resource searched for
>>     * @end:	end address of same resource
>>     * @flags:	flags which the resource must have
>> @@ -337,9 +338,9 @@ static bool is_type_match(struct resource *p, unsigned long flags, unsigned long
>>     * The caller must specify @start, @end, @flags, and @desc
>>     * (which may be IORES_DESC_NONE).
>>     */
>> -static int find_next_iomem_res(resource_size_t start, resource_size_t end,
>> -			       unsigned long flags, unsigned long desc,
>> -			       struct resource *res)
>> +static int find_next_res(struct resource *parent, resource_size_t start,
>> +			 resource_size_t end, unsigned long flags,
>> +			 unsigned long desc, struct resource *res)
>>    {
>>    	struct resource *p;
>>    
>> @@ -351,7 +352,7 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
>>    
>>    	read_lock(&resource_lock);
>>    
>> -	for_each_resource(&iomem_resource, p, false) {
>> +	for_each_resource(parent, p, false) {
>>    		/* If we passed the resource we are looking for, stop */
>>    		if (p->start > end) {
>>    			p = NULL;
>> @@ -382,16 +383,23 @@ static int find_next_iomem_res(resource_size_t start, resource_size_t end,
>>    	return p ? 0 : -ENODEV;
>>    }
>>    
>> -static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
>> -				 unsigned long flags, unsigned long desc,
>> -				 void *arg,
>> -				 int (*func)(struct resource *, void *))
>> +static int find_next_iomem_res(resource_size_t start, resource_size_t end,
>> +			       unsigned long flags, unsigned long desc,
>> +			       struct resource *res)
>> +{
>> +	return find_next_res(&iomem_resource, start, end, flags, desc, res);
>> +}
>> +
>> +static int walk_res_desc(struct resource *parent, resource_size_t start,
>> +			 resource_size_t end, unsigned long flags,
>> +			 unsigned long desc, void *arg,
>> +			 int (*func)(struct resource *, void *))
>>    {
>>    	struct resource res;
>>    	int ret = -EINVAL;
>>    
>>    	while (start < end &&
>> -	       !find_next_iomem_res(start, end, flags, desc, &res)) {
>> +	       !find_next_res(parent, start, end, flags, desc, &res)) {
>>    		ret = (*func)(&res, arg);
>>    		if (ret)
>>    			break;
>> @@ -402,6 +410,15 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
>>    	return ret;
>>    }
>>    
>> +static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
>> +				 unsigned long flags, unsigned long desc,
>> +				 void *arg,
>> +				 int (*func)(struct resource *, void *))
>> +{
>> +	return walk_res_desc(&iomem_resource, start, end, flags, desc, arg, func);
>> +}
>> +
>> +
>>    /**
>>     * walk_iomem_res_desc - Walks through iomem resources and calls func()
>>     *			 with matching resource ranges.
>> @@ -426,6 +443,26 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
>>    }
>>    EXPORT_SYMBOL_GPL(walk_iomem_res_desc);
>>    
>> +#ifdef CONFIG_EFI_SOFT_RESERVE
>> +struct resource soft_reserve_resource = {
>> +	.name	= "Soft Reserved",
>> +	.start	= 0,
>> +	.end	= -1,
>> +	.desc	= IORES_DESC_SOFT_RESERVED,
>> +	.flags	= IORESOURCE_MEM,
>> +};
>> +EXPORT_SYMBOL_GPL(soft_reserve_resource);
>> +
>> +int walk_soft_reserve_res_desc(unsigned long desc, unsigned long flags,
>> +			       u64 start, u64 end, void *arg,
>> +			       int (*func)(struct resource *, void *))
>> +{
>> +	return walk_res_desc(&soft_reserve_resource, start, end, flags, desc,
>> +			     arg, func);
>> +}
>> +EXPORT_SYMBOL_GPL(walk_soft_reserve_res_desc);
>> +#endif
>> +
>>    /*
>>     * This function calls the @func callback against all memory ranges of type
>>     * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
>> @@ -648,6 +685,20 @@ int region_intersects(resource_size_t start, size_t size, unsigned long flags,
>>    }
>>    EXPORT_SYMBOL_GPL(region_intersects);
>>    
>> +int region_intersects_soft_reserve(resource_size_t start, size_t size,
>> +				   unsigned long flags, unsigned long desc)
> 
> 
> Shouldn't this function be implemented uder `#if CONFIG_EFI_SOFT_RESERVE`? Otherwise it may cause compilation failures when the config is disabled.

Fixed it.

Thanks
Smita
> 
> Thanks
> Zhijian