[PATCH v4] kho: validate preserved memory map during population

Pasha Tatashin posted 1 patch 1 month, 2 weeks ago
kernel/liveupdate/kexec_handover.c | 37 +++++++++++++++---------------
1 file changed, 19 insertions(+), 18 deletions(-)
[PATCH v4] kho: validate preserved memory map during population
Posted by Pasha Tatashin 1 month, 2 weeks ago
If the previous kernel enabled KHO but did not call kho_finalize()
(e.g., CONFIG_LIVEUPDATE=n or userspace skipped the finalization step),
the 'preserved-memory-map' property in the FDT remains empty/zero.

Previously, kho_populate() would succeed regardless of the memory map's
state, reserving the incoming scratch regions in memblock. However,
kho_memory_init() would later fail to deserialize the empty map. By that
time, the scratch regions were already registered, leading to partial
initialization and subsequent list corruption (freeing scratch area
twice) during kho_init().

Move the validation of the preserved memory map earlier into
kho_populate(). If the memory map is empty/NULL:
1. Abort kho_populate() immediately with -ENOENT.
2. Do not register or reserve the incoming scratch memory, allowing the new
   kernel to reclaim those pages as standard free memory.
3. Leave the global 'kho_in' state uninitialized.

Consequently, kho_memory_init() sees no active KHO context
(kho_in.mem_chunks_phys is 0) and falls back to kho_reserve_scratch(),
allocating fresh scratch memory as if it were a standard cold boot.

Fixes: de51999e687c ("kho: allow memory preservation state updates after finalization")
Reported-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
Closes: https://lore.kernel.org/all/20251218215613.GA17304@ranerica-svr.sc.intel.com
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Tested-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
---
Changes v4:
- Addressed Tested-by
- Addressed review comments from Pratyush.

 kernel/liveupdate/kexec_handover.c | 37 +++++++++++++++---------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 9dc51fab604f..d4482b6e3cae 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -460,27 +460,23 @@ static void __init deserialize_bitmap(unsigned int order,
 	}
 }
 
-/* Return true if memory was deserizlied */
-static bool __init kho_mem_deserialize(const void *fdt)
+/* Returns physical address of the preserved memory map from FDT */
+static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
 {
-	struct khoser_mem_chunk *chunk;
 	const void *mem_ptr;
-	u64 mem;
 	int len;
 
 	mem_ptr = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
 	if (!mem_ptr || len != sizeof(u64)) {
 		pr_err("failed to get preserved memory bitmaps\n");
-		return false;
+		return 0;
 	}
 
-	mem = get_unaligned((const u64 *)mem_ptr);
-	chunk = mem ? phys_to_virt(mem) : NULL;
-
-	/* No preserved physical pages were passed, no deserialization */
-	if (!chunk)
-		return false;
+	return get_unaligned((const u64 *)mem_ptr);
+}
 
+static void __init kho_mem_deserialize(struct khoser_mem_chunk *chunk)
+{
 	while (chunk) {
 		unsigned int i;
 
@@ -489,8 +485,6 @@ static bool __init kho_mem_deserialize(const void *fdt)
 					   &chunk->bitmaps[i]);
 		chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
 	}
-
-	return true;
 }
 
 /*
@@ -1253,6 +1247,7 @@ bool kho_finalized(void)
 struct kho_in {
 	phys_addr_t fdt_phys;
 	phys_addr_t scratch_phys;
+	phys_addr_t mem_map_phys;
 	struct kho_debugfs dbg;
 };
 
@@ -1434,12 +1429,10 @@ static void __init kho_release_scratch(void)
 
 void __init kho_memory_init(void)
 {
-	if (kho_in.scratch_phys) {
+	if (kho_in.mem_map_phys) {
 		kho_scratch = phys_to_virt(kho_in.scratch_phys);
 		kho_release_scratch();
-
-		if (!kho_mem_deserialize(kho_get_fdt()))
-			kho_in.fdt_phys = 0;
+		kho_mem_deserialize(phys_to_virt(kho_in.mem_map_phys));
 	} else {
 		kho_reserve_scratch();
 	}
@@ -1448,8 +1441,9 @@ void __init kho_memory_init(void)
 void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 			 phys_addr_t scratch_phys, u64 scratch_len)
 {
-	void *fdt = NULL;
 	struct kho_scratch *scratch = NULL;
+	phys_addr_t mem_map_phys;
+	void *fdt = NULL;
 	int err = 0;
 	unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch);
 
@@ -1475,6 +1469,12 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 		goto out;
 	}
 
+	mem_map_phys = kho_get_mem_map_phys(fdt);
+	if (!mem_map_phys) {
+		err = -ENOENT;
+		goto out;
+	}
+
 	scratch = early_memremap(scratch_phys, scratch_len);
 	if (!scratch) {
 		pr_warn("setup: failed to memremap scratch (phys=0x%llx, len=%lld)\n",
@@ -1515,6 +1515,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 
 	kho_in.fdt_phys = fdt_phys;
 	kho_in.scratch_phys = scratch_phys;
+	kho_in.mem_map_phys = mem_map_phys;
 	kho_scratch_cnt = scratch_cnt;
 	pr_info("found kexec handover data.\n");
 

base-commit: cc3aa43b44bdb43dfbac0fcb51c56594a11338a8
-- 
2.52.0.351.gbe84eed79e-goog
Re: [PATCH v4] kho: validate preserved memory map during population
Posted by Zhu Yanjun 2 weeks, 6 days ago
在 2025/12/23 6:01, Pasha Tatashin 写道:
> If the previous kernel enabled KHO but did not call kho_finalize()
> (e.g., CONFIG_LIVEUPDATE=n or userspace skipped the finalization step),
> the 'preserved-memory-map' property in the FDT remains empty/zero.
> 
> Previously, kho_populate() would succeed regardless of the memory map's
> state, reserving the incoming scratch regions in memblock. However,
> kho_memory_init() would later fail to deserialize the empty map. By that
> time, the scratch regions were already registered, leading to partial
> initialization and subsequent list corruption (freeing scratch area
> twice) during kho_init().
> 
> Move the validation of the preserved memory map earlier into
> kho_populate(). If the memory map is empty/NULL:
> 1. Abort kho_populate() immediately with -ENOENT.
> 2. Do not register or reserve the incoming scratch memory, allowing the new
>     kernel to reclaim those pages as standard free memory.
> 3. Leave the global 'kho_in' state uninitialized.
> 
> Consequently, kho_memory_init() sees no active KHO context
> (kho_in.mem_chunks_phys is 0) and falls back to kho_reserve_scratch(),
> allocating fresh scratch memory as if it were a standard cold boot.
> 
> Fixes: de51999e687c ("kho: allow memory preservation state updates after finalization")
> Reported-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> Closes: https://lore.kernel.org/all/20251218215613.GA17304@ranerica-svr.sc.intel.com
> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> Tested-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> ---
> Changes v4:
> - Addressed Tested-by
> - Addressed review comments from Pratyush.
> 
>   kernel/liveupdate/kexec_handover.c | 37 +++++++++++++++---------------
>   1 file changed, 19 insertions(+), 18 deletions(-)
> 
> diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
> index 9dc51fab604f..d4482b6e3cae 100644
> --- a/kernel/liveupdate/kexec_handover.c
> +++ b/kernel/liveupdate/kexec_handover.c
> @@ -460,27 +460,23 @@ static void __init deserialize_bitmap(unsigned int order,
>   	}
>   }
>   
> -/* Return true if memory was deserizlied */
> -static bool __init kho_mem_deserialize(const void *fdt)
> +/* Returns physical address of the preserved memory map from FDT */
> +static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
>   {
> -	struct khoser_mem_chunk *chunk;
>   	const void *mem_ptr;
> -	u64 mem;
>   	int len;
>   
>   	mem_ptr = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
>   	if (!mem_ptr || len != sizeof(u64)) {
>   		pr_err("failed to get preserved memory bitmaps\n");
> -		return false;
> +		return 0;
>   	}
>   
> -	mem = get_unaligned((const u64 *)mem_ptr);
> -	chunk = mem ? phys_to_virt(mem) : NULL;
> -
> -	/* No preserved physical pages were passed, no deserialization */
> -	if (!chunk)
> -		return false;
> +	return get_unaligned((const u64 *)mem_ptr);
> +}
>   
> +static void __init kho_mem_deserialize(struct khoser_mem_chunk *chunk)
> +{
>   	while (chunk) {
>   		unsigned int i;
>   
> @@ -489,8 +485,6 @@ static bool __init kho_mem_deserialize(const void *fdt)
>   					   &chunk->bitmaps[i]);
>   		chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
>   	}
> -
> -	return true;
>   }
>   
>   /*
> @@ -1253,6 +1247,7 @@ bool kho_finalized(void)
>   struct kho_in {
>   	phys_addr_t fdt_phys;
>   	phys_addr_t scratch_phys;
> +	phys_addr_t mem_map_phys;
>   	struct kho_debugfs dbg;
>   };
>   
> @@ -1434,12 +1429,10 @@ static void __init kho_release_scratch(void)
>   
>   void __init kho_memory_init(void)
>   {
> -	if (kho_in.scratch_phys) {
> +	if (kho_in.mem_map_phys) {
>   		kho_scratch = phys_to_virt(kho_in.scratch_phys);
>   		kho_release_scratch();
> -
> -		if (!kho_mem_deserialize(kho_get_fdt()))
> -			kho_in.fdt_phys = 0;
> +		kho_mem_deserialize(phys_to_virt(kho_in.mem_map_phys));
>   	} else {
>   		kho_reserve_scratch();
>   	}
> @@ -1448,8 +1441,9 @@ void __init kho_memory_init(void)
>   void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>   			 phys_addr_t scratch_phys, u64 scratch_len)
>   {
> -	void *fdt = NULL;
>   	struct kho_scratch *scratch = NULL;
> +	phys_addr_t mem_map_phys;
> +	void *fdt = NULL;
>   	int err = 0;
>   	unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch);
>   
> @@ -1475,6 +1469,12 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>   		goto out;
>   	}
>   
> +	mem_map_phys = kho_get_mem_map_phys(fdt);
> +	if (!mem_map_phys) {
> +		err = -ENOENT;
> +		goto out;
> +	}
> +
>   	scratch = early_memremap(scratch_phys, scratch_len);
>   	if (!scratch) {
>   		pr_warn("setup: failed to memremap scratch (phys=0x%llx, len=%lld)\n",
> @@ -1515,6 +1515,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>   
>   	kho_in.fdt_phys = fdt_phys;
>   	kho_in.scratch_phys = scratch_phys;
> +	kho_in.mem_map_phys = mem_map_phys;
>   	kho_scratch_cnt = scratch_cnt;
>   	pr_info("found kexec handover data.\n");
>   
> 
> base-commit: cc3aa43b44bdb43dfbac0fcb51c56594a11338a8
The base-commit is

commit cc3aa43b44bdb43dfbac0fcb51c56594a11338a8 (HEAD -> 
upstream/master, tag: next-20251219)
Author: Stephen Rothwell <sfr@canb.auug.org.au>
Date:   Fri Dec 19 14:12:19 2025 +1100

     Add linux-next specific files for 20251219

     Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>

Now this commit can not be applied to the 6.19-rc5.

Zhu Yanjun
Re: [PATCH v4] kho: validate preserved memory map during population
Posted by Breno Leitao 1 month ago
Hello Pasha,

On Tue, Dec 23, 2025 at 09:01:40AM -0500, Pasha Tatashin wrote:
> If the previous kernel enabled KHO but did not call kho_finalize()
> (e.g., CONFIG_LIVEUPDATE=n or userspace skipped the finalization step),
> the 'preserved-memory-map' property in the FDT remains empty/zero.
> 
> Previously, kho_populate() would succeed regardless of the memory map's
> state, reserving the incoming scratch regions in memblock. However,
> kho_memory_init() would later fail to deserialize the empty map. By that
> time, the scratch regions were already registered, leading to partial
> initialization and subsequent list corruption (freeing scratch area
> twice) during kho_init().

While trying my new patchset [0] on top of this patch, I got the
following issue:

	[    0.000000] KHO: disabling KHO revival: -2

Trying to solve it, I come up with a change in kho_get_mem_map_phys() to
distinguish no memory and error, see the patch attached later.

This is what I used to test [0] on top of linux-next. Is this useful?

Link: https://lore.kernel.org/all/20260108-kho-v3-1-b1d6b7a89342@debian.org/ [0]

thanks
--breno

commit 5d7855fede8110d74942e1b67056ba589a1cb54a
Author: Breno Leitao <leitao@debian.org>
Date:   Thu Jan 8 07:44:08 2026 -0800

    kho: allow KHO to work when no memory is preserved
    
    Fix KHO initialization failing when no memory pages were preserved by
    the previous kernel.
    
    Commit eda79a683a0a ("kho: validate preserved memory map during
    population") introduced kho_get_mem_map_phys() which returns the physical
    address of the preserved memory map directly as its return value. The
    caller then validates it with:
    
        mem_map_phys = kho_get_mem_map_phys(fdt);
        if (!mem_map_phys) {
            err = -ENOENT;
            goto out;
        }
    
    This creates an ambiguity: physical address 0 is used both as an error
    indicator (property missing/malformed) and as a valid value (property
    exists with value 0, meaning no memory was preserved).
    
    "No memory preserved" is a legitimate state. KHO provides features beyond
    memory page preservation, such as previous kernel version tracking and
    kexec count tracking. When the previous kernel enables KHO but doesn't
    preserve any memory pages, it sets 'preserved-memory-map' to 0. This is
    semantically different from "KHO not initialized" - it means "KHO is
    active, there's just nothing in the memory map."
    
    Before eda79a683a0a, the code handled this gracefully in
    kho_mem_deserialize():
    
        chunk = mem ? phys_to_virt(mem) : NULL;
        if (!chunk)
            return false;  // No pages, but KHO could still work
    
    After eda79a683a0a, the early validation conflated "no property" with
    "property value is 0", causing KHO to be completely disabled in both
    cases.
    
    Fix this by changing kho_get_mem_map_phys() to return an error code and
    pass the physical address via pointer. This allows distinguishing between:
     - Property missing/malformed: return -ENOENT (KHO fails)
     - Property exists with value 0: return 0 (KHO succeeds, no memory to
       restore)
    
    Fixes: eda79a683a0a ("kho: validate preserved memory map during population")
    Signed-off-by: Breno Leitao <leitao@debian.org>

diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 271d90198a08..3cf2dc6840c9 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -471,8 +471,8 @@ static void __init deserialize_bitmap(unsigned int order,
 	}
 }
 
-/* Returns physical address of the preserved memory map from FDT */
-static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
+/* Returns 0 on success and stores physical address in *phys_out */
+static int __init kho_get_mem_map_phys(const void *fdt, phys_addr_t *phys_out)
 {
 	const void *mem_ptr;
 	int len;
@@ -480,10 +480,11 @@ static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
 	mem_ptr = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len);
 	if (!mem_ptr || len != sizeof(u64)) {
 		pr_err("failed to get preserved memory bitmaps\n");
-		return 0;
+		return -ENOENT;
 	}
 
-	return get_unaligned((const u64 *)mem_ptr);
+	*phys_out = get_unaligned((const u64 *)mem_ptr);
+	return 0;
 }
 
 static void __init kho_mem_deserialize(struct khoser_mem_chunk *chunk)
@@ -1439,7 +1440,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 			 phys_addr_t scratch_phys, u64 scratch_len)
 {
 	struct kho_scratch *scratch = NULL;
-	phys_addr_t mem_map_phys;
+	phys_addr_t mem_map_phys = 0;
 	void *fdt = NULL;
 	int err = 0;
 	unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch);
@@ -1466,11 +1467,9 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 		goto out;
 	}
 
-	mem_map_phys = kho_get_mem_map_phys(fdt);
-	if (!mem_map_phys) {
-		err = -ENOENT;
+	err = kho_get_mem_map_phys(fdt, &mem_map_phys);
+	if (err)
 		goto out;
-	}
 
 	scratch = early_memremap(scratch_phys, scratch_len);
 	if (!scratch) {
Re: [PATCH v4] kho: validate preserved memory map during population
Posted by Pratyush Yadav 3 weeks, 1 day ago
Hi Breno,

On Thu, Jan 08 2026, Breno Leitao wrote:

> Hello Pasha,
>
> On Tue, Dec 23, 2025 at 09:01:40AM -0500, Pasha Tatashin wrote:
>> If the previous kernel enabled KHO but did not call kho_finalize()
>> (e.g., CONFIG_LIVEUPDATE=n or userspace skipped the finalization step),
>> the 'preserved-memory-map' property in the FDT remains empty/zero.
>> 
>> Previously, kho_populate() would succeed regardless of the memory map's
>> state, reserving the incoming scratch regions in memblock. However,
>> kho_memory_init() would later fail to deserialize the empty map. By that
>> time, the scratch regions were already registered, leading to partial
>> initialization and subsequent list corruption (freeing scratch area
>> twice) during kho_init().
>
> While trying my new patchset [0] on top of this patch, I got the
> following issue:
>
> 	[    0.000000] KHO: disabling KHO revival: -2
>
> Trying to solve it, I come up with a change in kho_get_mem_map_phys() to
> distinguish no memory and error, see the patch attached later.
>
> This is what I used to test [0] on top of linux-next. Is this useful?
>
> Link: https://lore.kernel.org/all/20260108-kho-v3-1-b1d6b7a89342@debian.org/ [0]
>
> thanks
> --breno
>
> commit 5d7855fede8110d74942e1b67056ba589a1cb54a
> Author: Breno Leitao <leitao@debian.org>
> Date:   Thu Jan 8 07:44:08 2026 -0800
>
>     kho: allow KHO to work when no memory is preserved
>     
>     Fix KHO initialization failing when no memory pages were preserved by
>     the previous kernel.
>     
>     Commit eda79a683a0a ("kho: validate preserved memory map during
>     population") introduced kho_get_mem_map_phys() which returns the physical
>     address of the preserved memory map directly as its return value. The
>     caller then validates it with:
>     
>         mem_map_phys = kho_get_mem_map_phys(fdt);
>         if (!mem_map_phys) {
>             err = -ENOENT;
>             goto out;
>         }
>     
>     This creates an ambiguity: physical address 0 is used both as an error
>     indicator (property missing/malformed) and as a valid value (property
>     exists with value 0, meaning no memory was preserved).
>     
>     "No memory preserved" is a legitimate state. KHO provides features beyond
>     memory page preservation, such as previous kernel version tracking and
>     kexec count tracking. When the previous kernel enables KHO but doesn't
>     preserve any memory pages, it sets 'preserved-memory-map' to 0. This is
>     semantically different from "KHO not initialized" - it means "KHO is
>     active, there's just nothing in the memory map."

This isn't true. If you hand over _any_ state, you will at least need
the KHO FDT. And the KHO FDT is preserved memory (see the
kho_alloc_preserve() call in kho_init()). So I don't see how you can
ever have valid KHO with no memory.

mem_map_phys _can_ be 0, but only when KHO was enabled but not used. And
that is of course also a valid use case.

We want to treat mem_map_phys == 0 the same as the error path, just
without the error print. This lets us discard all previous scratch areas
since they don't have anything useful anyway, and have a fresh start.

So while you are seeing this error message, I don't think it should
break anything and KHO should still be working fine. You can
double-check this by inspecting /sys/kernel/debug/kho/out.

So I think the patch is certainly a useful fix, it just needs some
re-wording and fixups.

Some comments on the code below.

>     
>     Before eda79a683a0a, the code handled this gracefully in
>     kho_mem_deserialize():
>     
>         chunk = mem ? phys_to_virt(mem) : NULL;
>         if (!chunk)
>             return false;  // No pages, but KHO could still work
>     
>     After eda79a683a0a, the early validation conflated "no property" with
>     "property value is 0", causing KHO to be completely disabled in both
>     cases.
>     
>     Fix this by changing kho_get_mem_map_phys() to return an error code and
>     pass the physical address via pointer. This allows distinguishing between:
>      - Property missing/malformed: return -ENOENT (KHO fails)
>      - Property exists with value 0: return 0 (KHO succeeds, no memory to
>        restore)
>     
>     Fixes: eda79a683a0a ("kho: validate preserved memory map during population")
>     Signed-off-by: Breno Leitao <leitao@debian.org>
>
> diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
> index 271d90198a08..3cf2dc6840c9 100644
> --- a/kernel/liveupdate/kexec_handover.c
> +++ b/kernel/liveupdate/kexec_handover.c
> @@ -471,8 +471,8 @@ static void __init deserialize_bitmap(unsigned int order,
>  	}
>  }
>  
> -/* Returns physical address of the preserved memory map from FDT */
> -static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
> +/* Returns 0 on success and stores physical address in *phys_out */
> +static int __init kho_get_mem_map_phys(const void *fdt, phys_addr_t *phys_out)
>  {
>  	const void *mem_ptr;
>  	int len;
> @@ -480,10 +480,11 @@ static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
>  	mem_ptr = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len);
>  	if (!mem_ptr || len != sizeof(u64)) {
>  		pr_err("failed to get preserved memory bitmaps\n");
> -		return 0;
> +		return -ENOENT;
>  	}
>  
> -	return get_unaligned((const u64 *)mem_ptr);
> +	*phys_out = get_unaligned((const u64 *)mem_ptr);
> +	return 0;
>  }
>  
>  static void __init kho_mem_deserialize(struct khoser_mem_chunk *chunk)
> @@ -1439,7 +1440,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>  			 phys_addr_t scratch_phys, u64 scratch_len)
>  {
>  	struct kho_scratch *scratch = NULL;
> -	phys_addr_t mem_map_phys;
> +	phys_addr_t mem_map_phys = 0;
>  	void *fdt = NULL;
>  	int err = 0;
>  	unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch);
> @@ -1466,11 +1467,9 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>  		goto out;
>  	}
>  
> -	mem_map_phys = kho_get_mem_map_phys(fdt);
> -	if (!mem_map_phys) {
> -		err = -ENOENT;
> +	err = kho_get_mem_map_phys(fdt, &mem_map_phys);
> +	if (err)

This will break when mem_map_phys == 0. As I explained earlier, when
that happens we want to discard all previous scratch info and start with
a clean slate.

Making this if (err || !mem_map_phys) should do the trick. The if (err)
check before the print should make sure the error message is not printed
when we have a valid property but its value is 0.

>  		goto out;
> -	}
>  
>  	scratch = early_memremap(scratch_phys, scratch_len);
>  	if (!scratch) {

-- 
Regards,
Pratyush Yadav
Re: [PATCH v4] kho: validate preserved memory map during population
Posted by Breno Leitao 2 weeks, 5 days ago
On Fri, Jan 16, 2026 at 04:21:28PM +0000, Pratyush Yadav wrote:
> > On Tue, Dec 23, 2025 at 09:01:40AM -0500, Pasha Tatashin wrote:
> >> If the previous kernel enabled KHO but did not call kho_finalize()
> >> (e.g., CONFIG_LIVEUPDATE=n or userspace skipped the finalization step),
> >> the 'preserved-memory-map' property in the FDT remains empty/zero.
> >> 
> >> Previously, kho_populate() would succeed regardless of the memory map's
> >> state, reserving the incoming scratch regions in memblock. However,
> >> kho_memory_init() would later fail to deserialize the empty map. By that
> >> time, the scratch regions were already registered, leading to partial
> >> initialization and subsequent list corruption (freeing scratch area
> >> twice) during kho_init().
> >
> > While trying my new patchset [0] on top of this patch, I got the
> > following issue:
> >
> > 	[    0.000000] KHO: disabling KHO revival: -2
> >
> > Trying to solve it, I come up with a change in kho_get_mem_map_phys() to
> > distinguish no memory and error, see the patch attached later.
> >
> > This is what I used to test [0] on top of linux-next. Is this useful?
> >
> > Link: https://lore.kernel.org/all/20260108-kho-v3-1-b1d6b7a89342@debian.org/ [0]
> >
> > thanks
> > --breno
> >
> > commit 5d7855fede8110d74942e1b67056ba589a1cb54a
> > Author: Breno Leitao <leitao@debian.org>
> > Date:   Thu Jan 8 07:44:08 2026 -0800
> >
> >     kho: allow KHO to work when no memory is preserved
> >     
> >     Fix KHO initialization failing when no memory pages were preserved by
> >     the previous kernel.
> >     
> >     Commit eda79a683a0a ("kho: validate preserved memory map during
> >     population") introduced kho_get_mem_map_phys() which returns the physical
> >     address of the preserved memory map directly as its return value. The
> >     caller then validates it with:
> >     
> >         mem_map_phys = kho_get_mem_map_phys(fdt);
> >         if (!mem_map_phys) {
> >             err = -ENOENT;
> >             goto out;
> >         }
> >     
> >     This creates an ambiguity: physical address 0 is used both as an error
> >     indicator (property missing/malformed) and as a valid value (property
> >     exists with value 0, meaning no memory was preserved).
> >     
> >     "No memory preserved" is a legitimate state. KHO provides features beyond
> >     memory page preservation, such as previous kernel version tracking and
> >     kexec count tracking. When the previous kernel enables KHO but doesn't
> >     preserve any memory pages, it sets 'preserved-memory-map' to 0. This is
> >     semantically different from "KHO not initialized" - it means "KHO is
> >     active, there's just nothing in the memory map."
> 
> This isn't true. If you hand over _any_ state, you will at least need
> the KHO FDT. And the KHO FDT is preserved memory (see the
> kho_alloc_preserve() call in kho_init()). So I don't see how you can
> ever have valid KHO with no memory.
> 
> mem_map_phys _can_ be 0, but only when KHO was enabled but not used. And
> that is of course also a valid use case.

Oh, I was not finalizing KHO, and in commit e1c3bfd091f363c1
("kho: validate preserved memory map during population") started to fail this
on purpose.

So, I understand we want to fail if mem_map_phys = 0, but thn FDT was properly
passed (how is it possible)? I know I can read
KHO_PROP_PREVIOUS_RELEASE/KHO_PROP_PREVIOUS_RELEASE from the fdt, even when
mem_map_phys is 0.
Re: [PATCH v4] kho: validate preserved memory map during population
Posted by Pratyush Yadav 2 weeks, 4 days ago
On Mon, Jan 19 2026, Breno Leitao wrote:

> On Fri, Jan 16, 2026 at 04:21:28PM +0000, Pratyush Yadav wrote:
>> > On Tue, Dec 23, 2025 at 09:01:40AM -0500, Pasha Tatashin wrote:
>> >> If the previous kernel enabled KHO but did not call kho_finalize()
>> >> (e.g., CONFIG_LIVEUPDATE=n or userspace skipped the finalization step),
>> >> the 'preserved-memory-map' property in the FDT remains empty/zero.
>> >> 
>> >> Previously, kho_populate() would succeed regardless of the memory map's
>> >> state, reserving the incoming scratch regions in memblock. However,
>> >> kho_memory_init() would later fail to deserialize the empty map. By that
>> >> time, the scratch regions were already registered, leading to partial
>> >> initialization and subsequent list corruption (freeing scratch area
>> >> twice) during kho_init().
>> >
>> > While trying my new patchset [0] on top of this patch, I got the
>> > following issue:
>> >
>> > 	[    0.000000] KHO: disabling KHO revival: -2
>> >
>> > Trying to solve it, I come up with a change in kho_get_mem_map_phys() to
>> > distinguish no memory and error, see the patch attached later.
>> >
>> > This is what I used to test [0] on top of linux-next. Is this useful?
>> >
>> > Link: https://lore.kernel.org/all/20260108-kho-v3-1-b1d6b7a89342@debian.org/ [0]
>> >
>> > thanks
>> > --breno
>> >
>> > commit 5d7855fede8110d74942e1b67056ba589a1cb54a
>> > Author: Breno Leitao <leitao@debian.org>
>> > Date:   Thu Jan 8 07:44:08 2026 -0800
>> >
>> >     kho: allow KHO to work when no memory is preserved
>> >     
>> >     Fix KHO initialization failing when no memory pages were preserved by
>> >     the previous kernel.
>> >     
>> >     Commit eda79a683a0a ("kho: validate preserved memory map during
>> >     population") introduced kho_get_mem_map_phys() which returns the physical
>> >     address of the preserved memory map directly as its return value. The
>> >     caller then validates it with:
>> >     
>> >         mem_map_phys = kho_get_mem_map_phys(fdt);
>> >         if (!mem_map_phys) {
>> >             err = -ENOENT;
>> >             goto out;
>> >         }
>> >     
>> >     This creates an ambiguity: physical address 0 is used both as an error
>> >     indicator (property missing/malformed) and as a valid value (property
>> >     exists with value 0, meaning no memory was preserved).
>> >     
>> >     "No memory preserved" is a legitimate state. KHO provides features beyond
>> >     memory page preservation, such as previous kernel version tracking and
>> >     kexec count tracking. When the previous kernel enables KHO but doesn't
>> >     preserve any memory pages, it sets 'preserved-memory-map' to 0. This is
>> >     semantically different from "KHO not initialized" - it means "KHO is
>> >     active, there's just nothing in the memory map."
>> 
>> This isn't true. If you hand over _any_ state, you will at least need
>> the KHO FDT. And the KHO FDT is preserved memory (see the
>> kho_alloc_preserve() call in kho_init()). So I don't see how you can
>> ever have valid KHO with no memory.
>> 
>> mem_map_phys _can_ be 0, but only when KHO was enabled but not used. And
>> that is of course also a valid use case.
>
> Oh, I was not finalizing KHO, and in commit e1c3bfd091f363c1
> ("kho: validate preserved memory map during population") started to fail this
> on purpose.
>
> So, I understand we want to fail if mem_map_phys = 0, but thn FDT was properly
> passed (how is it possible)? I know I can read
> KHO_PROP_PREVIOUS_RELEASE/KHO_PROP_PREVIOUS_RELEASE from the fdt, even when
> mem_map_phys is 0.

See commit d7255959b69a ("kho: allow kexec load before KHO
finalization") for more of an explanation of why this is done.

The FDT is always handed over because when the KHO image is set up, we
don't know if KHO will actually be used. On the next boot, it is
discarded if KHO was not finalized (that is, mem_map_phys == 0) since it
won't contain any memory to hand over.

Since you had the previous release in the FDT itself, you were able to
read it. If you move to subtrees as I suggested, then it would stop
working.

The KHO FDT is special in a way. We access it _before_ going into
scratch-only mode, and until then we just have to hope nobody
over-writes that (which is unlikely in early boot anyway). So it is safe
to use the contents of the FDT itself to signal if we have any useful
KHO state to hand over.

-- 
Regards,
Pratyush Yadav
Re: [PATCH v4] kho: validate preserved memory map during population
Posted by Mike Rapoport 3 weeks ago
On Fri, Jan 16, 2026 at 04:21:28PM +0000, Pratyush Yadav wrote:
> Hi Breno,
> 
> On Thu, Jan 08 2026, Breno Leitao wrote:
> 
> > Hello Pasha,
> >
> > On Tue, Dec 23, 2025 at 09:01:40AM -0500, Pasha Tatashin wrote:
> >> If the previous kernel enabled KHO but did not call kho_finalize()
> >> (e.g., CONFIG_LIVEUPDATE=n or userspace skipped the finalization step),
> >> the 'preserved-memory-map' property in the FDT remains empty/zero.
> >> 
> >> Previously, kho_populate() would succeed regardless of the memory map's
> >> state, reserving the incoming scratch regions in memblock. However,
> >> kho_memory_init() would later fail to deserialize the empty map. By that
> >> time, the scratch regions were already registered, leading to partial
> >> initialization and subsequent list corruption (freeing scratch area
> >> twice) during kho_init().
> >
> > While trying my new patchset [0] on top of this patch, I got the
> > following issue:
> >
> > 	[    0.000000] KHO: disabling KHO revival: -2
> >
> > Trying to solve it, I come up with a change in kho_get_mem_map_phys() to
> > distinguish no memory and error, see the patch attached later.
> >
> > This is what I used to test [0] on top of linux-next. Is this useful?
> >
> > Link: https://lore.kernel.org/all/20260108-kho-v3-1-b1d6b7a89342@debian.org/ [0]
> >
> > thanks
> > --breno
> >
> > commit 5d7855fede8110d74942e1b67056ba589a1cb54a
> > Author: Breno Leitao <leitao@debian.org>
> > Date:   Thu Jan 8 07:44:08 2026 -0800
> >
> >     kho: allow KHO to work when no memory is preserved
> >     
> >     Fix KHO initialization failing when no memory pages were preserved by
> >     the previous kernel.
> >     
> >     Commit eda79a683a0a ("kho: validate preserved memory map during
> >     population") introduced kho_get_mem_map_phys() which returns the physical
> >     address of the preserved memory map directly as its return value. The
> >     caller then validates it with:
> >     
> >         mem_map_phys = kho_get_mem_map_phys(fdt);
> >         if (!mem_map_phys) {
> >             err = -ENOENT;
> >             goto out;
> >         }
> >     
> >     This creates an ambiguity: physical address 0 is used both as an error
> >     indicator (property missing/malformed) and as a valid value (property
> >     exists with value 0, meaning no memory was preserved).
> >     
> >     "No memory preserved" is a legitimate state. KHO provides features beyond
> >     memory page preservation, such as previous kernel version tracking and
> >     kexec count tracking. When the previous kernel enables KHO but doesn't
> >     preserve any memory pages, it sets 'preserved-memory-map' to 0. This is
> >     semantically different from "KHO not initialized" - it means "KHO is
> >     active, there's just nothing in the memory map."
> 
> This isn't true. If you hand over _any_ state, you will at least need
> the KHO FDT. And the KHO FDT is preserved memory (see the
> kho_alloc_preserve() call in kho_init()). So I don't see how you can
> ever have valid KHO with no memory.
> 
> mem_map_phys _can_ be 0, but only when KHO was enabled but not used. And
> that is of course also a valid use case.
> 
> We want to treat mem_map_phys == 0 the same as the error path, just
> without the error print. This lets us discard all previous scratch areas
> since they don't have anything useful anyway, and have a fresh start.
> 
> So while you are seeing this error message, I don't think it should
> break anything and KHO should still be working fine. You can
> double-check this by inspecting /sys/kernel/debug/kho/out.
> 
> So I think the patch is certainly a useful fix, it just needs some
> re-wording and fixups.
> 
> Some comments on the code below.
> 
> >     
> >     Before eda79a683a0a, the code handled this gracefully in
> >     kho_mem_deserialize():
> >     
> >         chunk = mem ? phys_to_virt(mem) : NULL;
> >         if (!chunk)
> >             return false;  // No pages, but KHO could still work
> >     
> >     After eda79a683a0a, the early validation conflated "no property" with
> >     "property value is 0", causing KHO to be completely disabled in both
> >     cases.
> >     
> >     Fix this by changing kho_get_mem_map_phys() to return an error code and
> >     pass the physical address via pointer. This allows distinguishing between:
> >      - Property missing/malformed: return -ENOENT (KHO fails)
> >      - Property exists with value 0: return 0 (KHO succeeds, no memory to
> >        restore)
> >     
> >     Fixes: eda79a683a0a ("kho: validate preserved memory map during population")
> >     Signed-off-by: Breno Leitao <leitao@debian.org>
> >
> > diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
> > index 271d90198a08..3cf2dc6840c9 100644
> > --- a/kernel/liveupdate/kexec_handover.c
> > +++ b/kernel/liveupdate/kexec_handover.c
> > @@ -471,8 +471,8 @@ static void __init deserialize_bitmap(unsigned int order,
> >  	}
> >  }
> >  
> > -/* Returns physical address of the preserved memory map from FDT */
> > -static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
> > +/* Returns 0 on success and stores physical address in *phys_out */
> > +static int __init kho_get_mem_map_phys(const void *fdt, phys_addr_t *phys_out)
> >  {
> >  	const void *mem_ptr;
> >  	int len;
> > @@ -480,10 +480,11 @@ static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
> >  	mem_ptr = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len);
> >  	if (!mem_ptr || len != sizeof(u64)) {
> >  		pr_err("failed to get preserved memory bitmaps\n");
> > -		return 0;
> > +		return -ENOENT;
> >  	}
> >  
> > -	return get_unaligned((const u64 *)mem_ptr);
> > +	*phys_out = get_unaligned((const u64 *)mem_ptr);
> > +	return 0;
> >  }
> >  
> >  static void __init kho_mem_deserialize(struct khoser_mem_chunk *chunk)
> > @@ -1439,7 +1440,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
> >  			 phys_addr_t scratch_phys, u64 scratch_len)
> >  {
> >  	struct kho_scratch *scratch = NULL;
> > -	phys_addr_t mem_map_phys;
> > +	phys_addr_t mem_map_phys = 0;
> >  	void *fdt = NULL;
> >  	int err = 0;
> >  	unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch);
> > @@ -1466,11 +1467,9 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
> >  		goto out;
> >  	}
> >  
> > -	mem_map_phys = kho_get_mem_map_phys(fdt);
> > -	if (!mem_map_phys) {
> > -		err = -ENOENT;
> > +	err = kho_get_mem_map_phys(fdt, &mem_map_phys);
> > +	if (err)
> 
> This will break when mem_map_phys == 0. As I explained earlier, when
> that happens we want to discard all previous scratch info and start with
> a clean slate.
> 
> Making this if (err || !mem_map_phys) should do the trick. The if (err)
> check before the print should make sure the error message is not printed
> when we have a valid property but its value is 0.

While we are on it, I'd suggest to change kho_populate() error handling to
use goto, (i.e like below)
Then a simple if (err) will do and that's much clearer.

Another thing I noticed it that assigning err to -EFAULT or -EINVAL after
printks is completely redundant, since we anyway report what went wrong, so
printing the error value in the end just not needed.

diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index feffeafa51b7..2bba111149c4 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -1453,27 +1453,27 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 	if (!fdt) {
 		pr_warn("setup: failed to memremap FDT (0x%llx)\n", fdt_phys);
 		err = -EFAULT;
-		goto out;
+		goto err_report;
 	}
 	err = fdt_check_header(fdt);
 	if (err) {
 		pr_warn("setup: handover FDT (0x%llx) is invalid: %d\n",
 			fdt_phys, err);
 		err = -EINVAL;
-		goto out;
+		goto err_unmap_fdt;
 	}
 	err = fdt_node_check_compatible(fdt, 0, KHO_FDT_COMPATIBLE);
 	if (err) {
 		pr_warn("setup: handover FDT (0x%llx) is incompatible with '%s': %d\n",
 			fdt_phys, KHO_FDT_COMPATIBLE, err);
 		err = -EINVAL;
-		goto out;
+		goto err_unmap_fdt;
 	}
 
 	mem_map_phys = kho_get_mem_map_phys(fdt);
 	if (!mem_map_phys) {
 		err = -ENOENT;
-		goto out;
+		goto err_unmap_fdt;
 	}
 
 	scratch = early_memremap(scratch_phys, scratch_len);
@@ -1481,7 +1481,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 		pr_warn("setup: failed to memremap scratch (phys=0x%llx, len=%lld)\n",
 			scratch_phys, scratch_len);
 		err = -EFAULT;
-		goto out;
+		goto err_unmap_scratch;
 	}
 
 	/*
@@ -1498,7 +1498,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 		if (WARN_ON(err)) {
 			pr_warn("failed to mark the scratch region 0x%pa+0x%pa: %pe",
 				&area->addr, &size, ERR_PTR(err));
-			goto out;
+			goto err_unmap_scratch;
 		}
 		pr_debug("Marked 0x%pa+0x%pa as scratch", &area->addr, &size);
 	}
@@ -1520,13 +1520,14 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
 	kho_scratch_cnt = scratch_cnt;
 	pr_info("found kexec handover data.\n");
 
-out:
-	if (fdt)
-		early_memunmap(fdt, fdt_len);
-	if (scratch)
-		early_memunmap(scratch, scratch_len);
-	if (err)
-		pr_warn("disabling KHO revival: %d\n", err);
+	return;
+
+err_unmap_scratch:
+	early_memunmap(scratch, scratch_len);
+err_unmap_fdt:
+	early_memunmap(fdt, fdt_len);
+err_report:
+	pr_warn("disabling KHO revival: %d\n", err);
 }
 
 /* Helper functions for kexec_file_load */
 
> >  		goto out;
> > -	}
> >  
> >  	scratch = early_memremap(scratch_phys, scratch_len);
> >  	if (!scratch) {
> 
> -- 
> Regards,
> Pratyush Yadav

-- 
Sincerely yours,
Mike.
Re: [PATCH v4] kho: validate preserved memory map during population
Posted by Pratyush Yadav 2 weeks, 4 days ago
On Sun, Jan 18 2026, Mike Rapoport wrote:

> On Fri, Jan 16, 2026 at 04:21:28PM +0000, Pratyush Yadav wrote:
>> Hi Breno,
>> 
>> On Thu, Jan 08 2026, Breno Leitao wrote:
>> 
>> > Hello Pasha,
>> >
>> > On Tue, Dec 23, 2025 at 09:01:40AM -0500, Pasha Tatashin wrote:
>> >> If the previous kernel enabled KHO but did not call kho_finalize()
>> >> (e.g., CONFIG_LIVEUPDATE=n or userspace skipped the finalization step),
>> >> the 'preserved-memory-map' property in the FDT remains empty/zero.
>> >> 
>> >> Previously, kho_populate() would succeed regardless of the memory map's
>> >> state, reserving the incoming scratch regions in memblock. However,
>> >> kho_memory_init() would later fail to deserialize the empty map. By that
>> >> time, the scratch regions were already registered, leading to partial
>> >> initialization and subsequent list corruption (freeing scratch area
>> >> twice) during kho_init().
>> >
>> > While trying my new patchset [0] on top of this patch, I got the
>> > following issue:
>> >
>> > 	[    0.000000] KHO: disabling KHO revival: -2
>> >
>> > Trying to solve it, I come up with a change in kho_get_mem_map_phys() to
>> > distinguish no memory and error, see the patch attached later.
>> >
>> > This is what I used to test [0] on top of linux-next. Is this useful?
>> >
>> > Link: https://lore.kernel.org/all/20260108-kho-v3-1-b1d6b7a89342@debian.org/ [0]
>> >
>> > thanks
>> > --breno
>> >
>> > commit 5d7855fede8110d74942e1b67056ba589a1cb54a
>> > Author: Breno Leitao <leitao@debian.org>
>> > Date:   Thu Jan 8 07:44:08 2026 -0800
>> >
>> >     kho: allow KHO to work when no memory is preserved
>> >     
>> >     Fix KHO initialization failing when no memory pages were preserved by
>> >     the previous kernel.
>> >     
>> >     Commit eda79a683a0a ("kho: validate preserved memory map during
>> >     population") introduced kho_get_mem_map_phys() which returns the physical
>> >     address of the preserved memory map directly as its return value. The
>> >     caller then validates it with:
>> >     
>> >         mem_map_phys = kho_get_mem_map_phys(fdt);
>> >         if (!mem_map_phys) {
>> >             err = -ENOENT;
>> >             goto out;
>> >         }
>> >     
>> >     This creates an ambiguity: physical address 0 is used both as an error
>> >     indicator (property missing/malformed) and as a valid value (property
>> >     exists with value 0, meaning no memory was preserved).
>> >     
>> >     "No memory preserved" is a legitimate state. KHO provides features beyond
>> >     memory page preservation, such as previous kernel version tracking and
>> >     kexec count tracking. When the previous kernel enables KHO but doesn't
>> >     preserve any memory pages, it sets 'preserved-memory-map' to 0. This is
>> >     semantically different from "KHO not initialized" - it means "KHO is
>> >     active, there's just nothing in the memory map."
>> 
>> This isn't true. If you hand over _any_ state, you will at least need
>> the KHO FDT. And the KHO FDT is preserved memory (see the
>> kho_alloc_preserve() call in kho_init()). So I don't see how you can
>> ever have valid KHO with no memory.
>> 
>> mem_map_phys _can_ be 0, but only when KHO was enabled but not used. And
>> that is of course also a valid use case.
>> 
>> We want to treat mem_map_phys == 0 the same as the error path, just
>> without the error print. This lets us discard all previous scratch areas
>> since they don't have anything useful anyway, and have a fresh start.
>> 
>> So while you are seeing this error message, I don't think it should
>> break anything and KHO should still be working fine. You can
>> double-check this by inspecting /sys/kernel/debug/kho/out.
>> 
>> So I think the patch is certainly a useful fix, it just needs some
>> re-wording and fixups.
>> 
>> Some comments on the code below.
>> 
>> >     
>> >     Before eda79a683a0a, the code handled this gracefully in
>> >     kho_mem_deserialize():
>> >     
>> >         chunk = mem ? phys_to_virt(mem) : NULL;
>> >         if (!chunk)
>> >             return false;  // No pages, but KHO could still work
>> >     
>> >     After eda79a683a0a, the early validation conflated "no property" with
>> >     "property value is 0", causing KHO to be completely disabled in both
>> >     cases.
>> >     
>> >     Fix this by changing kho_get_mem_map_phys() to return an error code and
>> >     pass the physical address via pointer. This allows distinguishing between:
>> >      - Property missing/malformed: return -ENOENT (KHO fails)
>> >      - Property exists with value 0: return 0 (KHO succeeds, no memory to
>> >        restore)
>> >     
>> >     Fixes: eda79a683a0a ("kho: validate preserved memory map during population")
>> >     Signed-off-by: Breno Leitao <leitao@debian.org>
>> >
>> > diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
>> > index 271d90198a08..3cf2dc6840c9 100644
>> > --- a/kernel/liveupdate/kexec_handover.c
>> > +++ b/kernel/liveupdate/kexec_handover.c
>> > @@ -471,8 +471,8 @@ static void __init deserialize_bitmap(unsigned int order,
>> >  	}
>> >  }
>> >  
>> > -/* Returns physical address of the preserved memory map from FDT */
>> > -static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
>> > +/* Returns 0 on success and stores physical address in *phys_out */
>> > +static int __init kho_get_mem_map_phys(const void *fdt, phys_addr_t *phys_out)
>> >  {
>> >  	const void *mem_ptr;
>> >  	int len;
>> > @@ -480,10 +480,11 @@ static phys_addr_t __init kho_get_mem_map_phys(const void *fdt)
>> >  	mem_ptr = fdt_getprop(fdt, 0, KHO_FDT_MEMORY_MAP_PROP_NAME, &len);
>> >  	if (!mem_ptr || len != sizeof(u64)) {
>> >  		pr_err("failed to get preserved memory bitmaps\n");
>> > -		return 0;
>> > +		return -ENOENT;
>> >  	}
>> >  
>> > -	return get_unaligned((const u64 *)mem_ptr);
>> > +	*phys_out = get_unaligned((const u64 *)mem_ptr);
>> > +	return 0;
>> >  }
>> >  
>> >  static void __init kho_mem_deserialize(struct khoser_mem_chunk *chunk)
>> > @@ -1439,7 +1440,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>> >  			 phys_addr_t scratch_phys, u64 scratch_len)
>> >  {
>> >  	struct kho_scratch *scratch = NULL;
>> > -	phys_addr_t mem_map_phys;
>> > +	phys_addr_t mem_map_phys = 0;
>> >  	void *fdt = NULL;
>> >  	int err = 0;
>> >  	unsigned int scratch_cnt = scratch_len / sizeof(*kho_scratch);
>> > @@ -1466,11 +1467,9 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>> >  		goto out;
>> >  	}
>> >  
>> > -	mem_map_phys = kho_get_mem_map_phys(fdt);
>> > -	if (!mem_map_phys) {
>> > -		err = -ENOENT;
>> > +	err = kho_get_mem_map_phys(fdt, &mem_map_phys);
>> > +	if (err)
>> 
>> This will break when mem_map_phys == 0. As I explained earlier, when
>> that happens we want to discard all previous scratch info and start with
>> a clean slate.
>> 
>> Making this if (err || !mem_map_phys) should do the trick. The if (err)
>> check before the print should make sure the error message is not printed
>> when we have a valid property but its value is 0.
>
> While we are on it, I'd suggest to change kho_populate() error handling to
> use goto, (i.e like below)
> Then a simple if (err) will do and that's much clearer.

Yeah, looks like a good cleanup.

>
> Another thing I noticed it that assigning err to -EFAULT or -EINVAL after
> printks is completely redundant, since we anyway report what went wrong, so
> printing the error value in the end just not needed.
>
> diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
> index feffeafa51b7..2bba111149c4 100644
> --- a/kernel/liveupdate/kexec_handover.c
> +++ b/kernel/liveupdate/kexec_handover.c
> @@ -1453,27 +1453,27 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>  	if (!fdt) {
>  		pr_warn("setup: failed to memremap FDT (0x%llx)\n", fdt_phys);
>  		err = -EFAULT;
> -		goto out;
> +		goto err_report;
>  	}
>  	err = fdt_check_header(fdt);
>  	if (err) {
>  		pr_warn("setup: handover FDT (0x%llx) is invalid: %d\n",
>  			fdt_phys, err);
>  		err = -EINVAL;
> -		goto out;
> +		goto err_unmap_fdt;
>  	}
>  	err = fdt_node_check_compatible(fdt, 0, KHO_FDT_COMPATIBLE);
>  	if (err) {
>  		pr_warn("setup: handover FDT (0x%llx) is incompatible with '%s': %d\n",
>  			fdt_phys, KHO_FDT_COMPATIBLE, err);
>  		err = -EINVAL;
> -		goto out;
> +		goto err_unmap_fdt;
>  	}
>  
>  	mem_map_phys = kho_get_mem_map_phys(fdt);
>  	if (!mem_map_phys) {
>  		err = -ENOENT;
> -		goto out;
> +		goto err_unmap_fdt;
>  	}
>  
>  	scratch = early_memremap(scratch_phys, scratch_len);
> @@ -1481,7 +1481,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>  		pr_warn("setup: failed to memremap scratch (phys=0x%llx, len=%lld)\n",
>  			scratch_phys, scratch_len);
>  		err = -EFAULT;
> -		goto out;
> +		goto err_unmap_scratch;
>  	}
>  
>  	/*
> @@ -1498,7 +1498,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>  		if (WARN_ON(err)) {
>  			pr_warn("failed to mark the scratch region 0x%pa+0x%pa: %pe",
>  				&area->addr, &size, ERR_PTR(err));
> -			goto out;
> +			goto err_unmap_scratch;
>  		}
>  		pr_debug("Marked 0x%pa+0x%pa as scratch", &area->addr, &size);
>  	}
> @@ -1520,13 +1520,14 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>  	kho_scratch_cnt = scratch_cnt;
>  	pr_info("found kexec handover data.\n");
>  
> -out:
> -	if (fdt)
> -		early_memunmap(fdt, fdt_len);
> -	if (scratch)
> -		early_memunmap(scratch, scratch_len);
> -	if (err)
> -		pr_warn("disabling KHO revival: %d\n", err);
> +	return;
> +
> +err_unmap_scratch:
> +	early_memunmap(scratch, scratch_len);
> +err_unmap_fdt:
> +	early_memunmap(fdt, fdt_len);
> +err_report:
> +	pr_warn("disabling KHO revival: %d\n", err);
>  }
>  
>  /* Helper functions for kexec_file_load */
>  
>> >  		goto out;
>> > -	}
>> >  
>> >  	scratch = early_memremap(scratch_phys, scratch_len);
>> >  	if (!scratch) {
>> 
>> -- 
>> Regards,
>> Pratyush Yadav

-- 
Regards,
Pratyush Yadav
Re: [PATCH v4] kho: validate preserved memory map during population
Posted by Pratyush Yadav 1 month, 2 weeks ago
On Tue, Dec 23 2025, Pasha Tatashin wrote:

> If the previous kernel enabled KHO but did not call kho_finalize()
> (e.g., CONFIG_LIVEUPDATE=n or userspace skipped the finalization step),
> the 'preserved-memory-map' property in the FDT remains empty/zero.
>
> Previously, kho_populate() would succeed regardless of the memory map's
> state, reserving the incoming scratch regions in memblock. However,
> kho_memory_init() would later fail to deserialize the empty map. By that
> time, the scratch regions were already registered, leading to partial
> initialization and subsequent list corruption (freeing scratch area
> twice) during kho_init().
>
> Move the validation of the preserved memory map earlier into
> kho_populate(). If the memory map is empty/NULL:
> 1. Abort kho_populate() immediately with -ENOENT.
> 2. Do not register or reserve the incoming scratch memory, allowing the new
>    kernel to reclaim those pages as standard free memory.
> 3. Leave the global 'kho_in' state uninitialized.
>
> Consequently, kho_memory_init() sees no active KHO context
> (kho_in.mem_chunks_phys is 0) and falls back to kho_reserve_scratch(),
> allocating fresh scratch memory as if it were a standard cold boot.
>
> Fixes: de51999e687c ("kho: allow memory preservation state updates after finalization")
> Reported-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> Closes: https://lore.kernel.org/all/20251218215613.GA17304@ranerica-svr.sc.intel.com
> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> Reviewed-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> Tested-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>

Reviewed-by: Pratyush Yadav <pratyush@kernel.org>

[...]

-- 
Regards,
Pratyush Yadav