During boot, kho_restore_folio() relies on the memory map having been
successfully deserialized. If deserialization fails or no map is present,
attempting to restore the FDT folio is unsafe.
Update kho_mem_deserialize() to return a boolean indicating success. Use
this return value in kho_memory_init() to disable KHO if deserialization
fails. Also, the incoming FDT folio is never used, there is no reason to
restore it.
Additionally, use memcpy() to retrieve the memory map pointer from the FDT.
FDT properties are not guaranteed to be naturally aligned, and accessing
a 64-bit value via a pointer that is only 32-bit aligned can cause faults.
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
kernel/liveupdate/kexec_handover.c | 32 ++++++++++++++++++------------
1 file changed, 19 insertions(+), 13 deletions(-)
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index a4b33ca79246..83aca3b4af15 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -450,20 +450,28 @@ static void __init deserialize_bitmap(unsigned int order,
}
}
-static void __init kho_mem_deserialize(const void *fdt)
+/* Return true if memory was deserizlied */
+static bool __init kho_mem_deserialize(const void *fdt)
{
struct khoser_mem_chunk *chunk;
- const phys_addr_t *mem;
+ const void *mem_ptr;
+ u64 mem;
int len;
- mem = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
-
- if (!mem || len != sizeof(*mem)) {
+ mem_ptr = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
+ if (!mem_ptr || len != sizeof(u64)) {
pr_err("failed to get preserved memory bitmaps\n");
- return;
+ return false;
}
+ /* FDT guarantees 32-bit alignment, have to use memcpy */
+ memcpy(&mem, mem_ptr, len);
+
+ chunk = mem ? phys_to_virt(mem) : NULL;
+
+ /* No preserved physical pages were passed, no deserialization */
+ if (!chunk)
+ return false;
- chunk = *mem ? phys_to_virt(*mem) : NULL;
while (chunk) {
unsigned int i;
@@ -472,6 +480,8 @@ static void __init kho_mem_deserialize(const void *fdt)
&chunk->bitmaps[i]);
chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
}
+
+ return true;
}
/*
@@ -1377,16 +1387,12 @@ static void __init kho_release_scratch(void)
void __init kho_memory_init(void)
{
- struct folio *folio;
-
if (kho_in.scratch_phys) {
kho_scratch = phys_to_virt(kho_in.scratch_phys);
kho_release_scratch();
- kho_mem_deserialize(kho_get_fdt());
- folio = kho_restore_folio(kho_in.fdt_phys);
- if (!folio)
- pr_warn("failed to restore folio for KHO fdt\n");
+ if (!kho_mem_deserialize(kho_get_fdt()))
+ kho_in.fdt_phys = 0;
} else {
kho_reserve_scratch();
}
--
2.52.0.rc1.455.g30608eb744-goog
On Fri, Nov 14 2025, Pasha Tatashin wrote:
> During boot, kho_restore_folio() relies on the memory map having been
> successfully deserialized. If deserialization fails or no map is present,
> attempting to restore the FDT folio is unsafe.
>
> Update kho_mem_deserialize() to return a boolean indicating success. Use
> this return value in kho_memory_init() to disable KHO if deserialization
> fails. Also, the incoming FDT folio is never used, there is no reason to
> restore it.
>
> Additionally, use memcpy() to retrieve the memory map pointer from the FDT.
> FDT properties are not guaranteed to be naturally aligned, and accessing
> a 64-bit value via a pointer that is only 32-bit aligned can cause faults.
>
> Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> ---
> kernel/liveupdate/kexec_handover.c | 32 ++++++++++++++++++------------
> 1 file changed, 19 insertions(+), 13 deletions(-)
>
> diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
> index a4b33ca79246..83aca3b4af15 100644
> --- a/kernel/liveupdate/kexec_handover.c
> +++ b/kernel/liveupdate/kexec_handover.c
> @@ -450,20 +450,28 @@ static void __init deserialize_bitmap(unsigned int order,
> }
> }
>
> -static void __init kho_mem_deserialize(const void *fdt)
> +/* Return true if memory was deserizlied */
> +static bool __init kho_mem_deserialize(const void *fdt)
> {
> struct khoser_mem_chunk *chunk;
> - const phys_addr_t *mem;
> + const void *mem_ptr;
> + u64 mem;
> int len;
>
> - mem = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
> -
> - if (!mem || len != sizeof(*mem)) {
> + mem_ptr = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
> + if (!mem_ptr || len != sizeof(u64)) {
> pr_err("failed to get preserved memory bitmaps\n");
> - return;
> + return false;
> }
> + /* FDT guarantees 32-bit alignment, have to use memcpy */
> + memcpy(&mem, mem_ptr, len);
Perhaps get_unaligned(mem) would have been simpler?
> +
> + chunk = mem ? phys_to_virt(mem) : NULL;
> +
> + /* No preserved physical pages were passed, no deserialization */
> + if (!chunk)
> + return false;
Should we disallow all kho_restore_{folio,pages}() calls too if this
fails? Ideally those should never happen since kho_retrieve_subtree()
will fail, so maybe as a debug aid?
>
> - chunk = *mem ? phys_to_virt(*mem) : NULL;
> while (chunk) {
> unsigned int i;
>
> @@ -472,6 +480,8 @@ static void __init kho_mem_deserialize(const void *fdt)
> &chunk->bitmaps[i]);
> chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
> }
> +
> + return true;
> }
>
> /*
> @@ -1377,16 +1387,12 @@ static void __init kho_release_scratch(void)
>
> void __init kho_memory_init(void)
> {
> - struct folio *folio;
> -
> if (kho_in.scratch_phys) {
> kho_scratch = phys_to_virt(kho_in.scratch_phys);
> kho_release_scratch();
>
> - kho_mem_deserialize(kho_get_fdt());
> - folio = kho_restore_folio(kho_in.fdt_phys);
> - if (!folio)
> - pr_warn("failed to restore folio for KHO fdt\n");
> + if (!kho_mem_deserialize(kho_get_fdt()))
> + kho_in.fdt_phys = 0;
The folio restore does serve a purpose: it accounts for that folio in
the system's total memory. See the call to adjust_managed_page_count()
in kho_restore_page(). In practice, I don't think it makes much of a
difference, but I don't see why not.
> } else {
> kho_reserve_scratch();
> }
--
Regards,
Pratyush Yadav
On Fri, Nov 14, 2025 at 05:52:37PM +0100, Pratyush Yadav wrote:
> On Fri, Nov 14 2025, Pasha Tatashin wrote:
>
> > @@ -1377,16 +1387,12 @@ static void __init kho_release_scratch(void)
> >
> > void __init kho_memory_init(void)
> > {
> > - struct folio *folio;
> > -
> > if (kho_in.scratch_phys) {
> > kho_scratch = phys_to_virt(kho_in.scratch_phys);
> > kho_release_scratch();
> >
> > - kho_mem_deserialize(kho_get_fdt());
> > - folio = kho_restore_folio(kho_in.fdt_phys);
> > - if (!folio)
> > - pr_warn("failed to restore folio for KHO fdt\n");
> > + if (!kho_mem_deserialize(kho_get_fdt()))
> > + kho_in.fdt_phys = 0;
>
> The folio restore does serve a purpose: it accounts for that folio in
> the system's total memory. See the call to adjust_managed_page_count()
> in kho_restore_page(). In practice, I don't think it makes much of a
> difference, but I don't see why not.
This page is never freed, so adding it to zone managed pages or keeping it
reserved does not change anything.
> > } else {
> > kho_reserve_scratch();
> > }
>
> --
> Regards,
> Pratyush Yadav
--
Sincerely yours,
Mike.
On Sat, Nov 15 2025, Mike Rapoport wrote:
> On Fri, Nov 14, 2025 at 05:52:37PM +0100, Pratyush Yadav wrote:
>> On Fri, Nov 14 2025, Pasha Tatashin wrote:
>>
>> > @@ -1377,16 +1387,12 @@ static void __init kho_release_scratch(void)
>> >
>> > void __init kho_memory_init(void)
>> > {
>> > - struct folio *folio;
>> > -
>> > if (kho_in.scratch_phys) {
>> > kho_scratch = phys_to_virt(kho_in.scratch_phys);
>> > kho_release_scratch();
>> >
>> > - kho_mem_deserialize(kho_get_fdt());
>> > - folio = kho_restore_folio(kho_in.fdt_phys);
>> > - if (!folio)
>> > - pr_warn("failed to restore folio for KHO fdt\n");
>> > + if (!kho_mem_deserialize(kho_get_fdt()))
>> > + kho_in.fdt_phys = 0;
>>
>> The folio restore does serve a purpose: it accounts for that folio in
>> the system's total memory. See the call to adjust_managed_page_count()
>> in kho_restore_page(). In practice, I don't think it makes much of a
>> difference, but I don't see why not.
>
> This page is never freed, so adding it to zone managed pages or keeping it
> reserved does not change anything.
In practice, sure. I still don't see a good reason to _not_ initialize
the page properly. It's not like it costs us much in terms of
performance or code complexity.
Since kho_restore_folio() makes sure the folio was _actually_ preserved
from KHO, you have a safety check against previous kernel having a bug
and not preserving the FDT properly. And I get that the FDT has already
been used by this point, but at least you would have some known point to
catch this.
[...]
--
Regards,
Pratyush Yadav
> > This page is never freed, so adding it to zone managed pages or keeping it > > reserved does not change anything. > > In practice, sure. I still don't see a good reason to _not_ initialize > the page properly. It's not like it costs us much in terms of > performance or code complexity. > > Since kho_restore_folio() makes sure the folio was _actually_ preserved > from KHO, you have a safety check against previous kernel having a bug > and not preserving the FDT properly. And I get that the FDT has already > been used by this point, but at least you would have some known point to > catch this. The kho_alloc_preserve() API is different from kho_preserve_folio(). With kho_preserve_folio(), memory is allocated and some time later is preserved, so there is a possibility for that memory to exist and be used where it is not preserved, therefore it is a crucial step for such memory to also do kho_restore_folio() before used. With kho_alloc_preserve(), when the memory exists it is always preserved; it is gurantee of this API. There is no reason to do kho_restore_folio() on such memory at all. It can be released back to the system via kho_free_restore()/kho_free_unpreserve(). Pasha > > [...] > > -- > Regards, > Pratyush Yadav
On Tue, Nov 18 2025, Pasha Tatashin wrote: >> > This page is never freed, so adding it to zone managed pages or keeping it >> > reserved does not change anything. >> >> In practice, sure. I still don't see a good reason to _not_ initialize >> the page properly. It's not like it costs us much in terms of >> performance or code complexity. >> >> Since kho_restore_folio() makes sure the folio was _actually_ preserved >> from KHO, you have a safety check against previous kernel having a bug >> and not preserving the FDT properly. And I get that the FDT has already >> been used by this point, but at least you would have some known point to >> catch this. > > The kho_alloc_preserve() API is different from kho_preserve_folio(). > With kho_preserve_folio(), memory is allocated and some time later is > preserved, so there is a possibility for that memory to exist and be > used where it is not preserved, therefore it is a crucial step for > such memory to also do kho_restore_folio() before used. With > kho_alloc_preserve(), when the memory exists it is always preserved; > it is gurantee of this API. There is no reason to do > kho_restore_folio() on such memory at all. It can be released back to > the system via kho_free_restore()/kho_free_unpreserve(). Even for those I think there should be a kho_restore_mem() or something similar (naming things is hard :/), so they go through the restore, their struct page is properly initialized and accounted for, and make sure the pages were actually preserved. Using the memory without restoring it first should be the exception IMO. -- Regards, Pratyush Yadav
On Tue, Nov 18, 2025 at 06:11:24PM +0100, Pratyush Yadav wrote: > On Tue, Nov 18 2025, Pasha Tatashin wrote: > > >> > This page is never freed, so adding it to zone managed pages or keeping it > >> > reserved does not change anything. > >> > >> In practice, sure. I still don't see a good reason to _not_ initialize > >> the page properly. It's not like it costs us much in terms of > >> performance or code complexity. > >> > >> Since kho_restore_folio() makes sure the folio was _actually_ preserved > >> from KHO, you have a safety check against previous kernel having a bug > >> and not preserving the FDT properly. And I get that the FDT has already > >> been used by this point, but at least you would have some known point to > >> catch this. > > > > The kho_alloc_preserve() API is different from kho_preserve_folio(). > > With kho_preserve_folio(), memory is allocated and some time later is > > preserved, so there is a possibility for that memory to exist and be > > used where it is not preserved, therefore it is a crucial step for > > such memory to also do kho_restore_folio() before used. With > > kho_alloc_preserve(), when the memory exists it is always preserved; > > it is gurantee of this API. There is no reason to do > > kho_restore_folio() on such memory at all. It can be released back to > > the system via kho_free_restore()/kho_free_unpreserve(). > > Even for those I think there should be a kho_restore_mem() or something > similar (naming things is hard :/), so they go through the restore, > their struct page is properly initialized and accounted for, and > make sure the pages were actually preserved. > > Using the memory without restoring it first should be the exception IMO. Base KHO and LUO FTDs are such exceptions for sure :) We have to use them way before we can even think about restoring. > -- > Regards, > Pratyush Yadav -- Sincerely yours, Mike.
On Fri, Nov 14, 2025 at 11:52 AM Pratyush Yadav <pratyush@kernel.org> wrote:
>
> On Fri, Nov 14 2025, Pasha Tatashin wrote:
>
> > During boot, kho_restore_folio() relies on the memory map having been
> > successfully deserialized. If deserialization fails or no map is present,
> > attempting to restore the FDT folio is unsafe.
> >
> > Update kho_mem_deserialize() to return a boolean indicating success. Use
> > this return value in kho_memory_init() to disable KHO if deserialization
> > fails. Also, the incoming FDT folio is never used, there is no reason to
> > restore it.
> >
> > Additionally, use memcpy() to retrieve the memory map pointer from the FDT.
> > FDT properties are not guaranteed to be naturally aligned, and accessing
> > a 64-bit value via a pointer that is only 32-bit aligned can cause faults.
> >
> > Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
> > ---
> > kernel/liveupdate/kexec_handover.c | 32 ++++++++++++++++++------------
> > 1 file changed, 19 insertions(+), 13 deletions(-)
> >
> > diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
> > index a4b33ca79246..83aca3b4af15 100644
> > --- a/kernel/liveupdate/kexec_handover.c
> > +++ b/kernel/liveupdate/kexec_handover.c
> > @@ -450,20 +450,28 @@ static void __init deserialize_bitmap(unsigned int order,
> > }
> > }
> >
> > -static void __init kho_mem_deserialize(const void *fdt)
> > +/* Return true if memory was deserizlied */
> > +static bool __init kho_mem_deserialize(const void *fdt)
> > {
> > struct khoser_mem_chunk *chunk;
> > - const phys_addr_t *mem;
> > + const void *mem_ptr;
> > + u64 mem;
> > int len;
> >
> > - mem = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
> > -
> > - if (!mem || len != sizeof(*mem)) {
> > + mem_ptr = fdt_getprop(fdt, 0, PROP_PRESERVED_MEMORY_MAP, &len);
> > + if (!mem_ptr || len != sizeof(u64)) {
> > pr_err("failed to get preserved memory bitmaps\n");
> > - return;
> > + return false;
> > }
> > + /* FDT guarantees 32-bit alignment, have to use memcpy */
> > + memcpy(&mem, mem_ptr, len);
>
> Perhaps get_unaligned(mem) would have been simpler?
Hm, it certainly more descriptive. I will see if I can use it.
>
> > +
> > + chunk = mem ? phys_to_virt(mem) : NULL;
> > +
> > + /* No preserved physical pages were passed, no deserialization */
> > + if (!chunk)
> > + return false;
>
> Should we disallow all kho_restore_{folio,pages}() calls too if this
> fails? Ideally those should never happen since kho_retrieve_subtree()
> will fail, so maybe as a debug aid?
Right, my thinking was that they should never happen, as they do not
have a way to know the location of folios to be restored. So,
preventing FDT access that we do does that.
>
> >
> > - chunk = *mem ? phys_to_virt(*mem) : NULL;
> > while (chunk) {
> > unsigned int i;
> >
> > @@ -472,6 +480,8 @@ static void __init kho_mem_deserialize(const void *fdt)
> > &chunk->bitmaps[i]);
> > chunk = KHOSER_LOAD_PTR(chunk->hdr.next);
> > }
> > +
> > + return true;
> > }
> >
> > /*
> > @@ -1377,16 +1387,12 @@ static void __init kho_release_scratch(void)
> >
> > void __init kho_memory_init(void)
> > {
> > - struct folio *folio;
> > -
> > if (kho_in.scratch_phys) {
> > kho_scratch = phys_to_virt(kho_in.scratch_phys);
> > kho_release_scratch();
> >
> > - kho_mem_deserialize(kho_get_fdt());
> > - folio = kho_restore_folio(kho_in.fdt_phys);
> > - if (!folio)
> > - pr_warn("failed to restore folio for KHO fdt\n");
> > + if (!kho_mem_deserialize(kho_get_fdt()))
> > + kho_in.fdt_phys = 0;
>
> The folio restore does serve a purpose: it accounts for that folio in
> the system's total memory. See the call to adjust_managed_page_count()
> in kho_restore_page(). In practice, I don't think it makes much of a
> difference, but I don't see why not.
>
> > } else {
> > kho_reserve_scratch();
> > }
>
> --
> Regards,
> Pratyush Yadav
© 2016 - 2026 Red Hat, Inc.