The scratch memory for kexec handover is used to bootstrap the
kexec'ed kernel. Only the 1st 1MB is used as scratch, and its a
hack to get around limitations with KHO. It is only needed when
CONFIG_KEXEC_HANDOVER is enabled and only if it is a KHO boot
(both checked by is_kho_boot). Add check to prevent marking a KHO
scratch region unless needed.
Fixes: a2daf83e10378 ("x86/e820: temporarily enable KHO scratch for memory below 1M")
Reported-by: Vlad Poenaru <thevlad@meta.com>
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
---
mm/memblock.c | 14 +++++++++-----
1 file changed, 9 insertions(+), 5 deletions(-)
diff --git a/mm/memblock.c b/mm/memblock.c
index 8b13d5c28922a..913cf322eb89a 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -20,8 +20,8 @@
#ifdef CONFIG_KEXEC_HANDOVER
#include <linux/libfdt.h>
-#include <linux/kexec_handover.h>
#endif /* CONFIG_KEXEC_HANDOVER */
+#include <linux/kexec_handover.h>
#include <asm/sections.h>
#include <linux/io.h>
@@ -1126,8 +1126,10 @@ int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t
*/
__init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
{
- return memblock_setclr_flag(&memblock.memory, base, size, 1,
- MEMBLOCK_KHO_SCRATCH);
+ if (is_kho_boot())
+ return memblock_setclr_flag(&memblock.memory, base, size, 1,
+ MEMBLOCK_KHO_SCRATCH);
+ return 0;
}
/**
@@ -1140,8 +1142,10 @@ __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
*/
__init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size)
{
- return memblock_setclr_flag(&memblock.memory, base, size, 0,
- MEMBLOCK_KHO_SCRATCH);
+ if (is_kho_boot())
+ return memblock_setclr_flag(&memblock.memory, base, size, 0,
+ MEMBLOCK_KHO_SCRATCH);
+ return 0;
}
static bool should_skip_region(struct memblock_type *type,
--
2.47.3
On Fri, Nov 28, 2025 at 05:29:34PM +0000, Usama Arif wrote:
> The scratch memory for kexec handover is used to bootstrap the
> kexec'ed kernel. Only the 1st 1MB is used as scratch, and its a
> hack to get around limitations with KHO. It is only needed when
> CONFIG_KEXEC_HANDOVER is enabled and only if it is a KHO boot
> (both checked by is_kho_boot). Add check to prevent marking a KHO
> scratch region unless needed.
I'm going to rewrite the changelog and queue this for upstream:
The scratch memory for kexec handover is used to bootstrap the kexec'ed
kernel and it is only needed when it is a KHO boot, i.e. a kexec boot with
handover data passed from the previous kernel.
Currently x86 marks the first megabyte of memory as KHO scratch even for
non-KHO boots if CONFIG_KEXEC_HANDOVER is enabled.
Add check to prevent marking a KHO scratch regions unless they are actually
needed.
> Fixes: a2daf83e10378 ("x86/e820: temporarily enable KHO scratch for memory below 1M")
> Reported-by: Vlad Poenaru <thevlad@meta.com>
> Signed-off-by: Usama Arif <usamaarif642@gmail.com>
> Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
> ---
> mm/memblock.c | 14 +++++++++-----
> 1 file changed, 9 insertions(+), 5 deletions(-)
>
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 8b13d5c28922a..913cf322eb89a 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -20,8 +20,8 @@
>
> #ifdef CONFIG_KEXEC_HANDOVER
> #include <linux/libfdt.h>
> -#include <linux/kexec_handover.h>
> #endif /* CONFIG_KEXEC_HANDOVER */
> +#include <linux/kexec_handover.h>
>
> #include <asm/sections.h>
> #include <linux/io.h>
> @@ -1126,8 +1126,10 @@ int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t
> */
> __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
> {
> - return memblock_setclr_flag(&memblock.memory, base, size, 1,
> - MEMBLOCK_KHO_SCRATCH);
> + if (is_kho_boot())
> + return memblock_setclr_flag(&memblock.memory, base, size, 1,
> + MEMBLOCK_KHO_SCRATCH);
> + return 0;
> }
>
> /**
> @@ -1140,8 +1142,10 @@ __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
> */
> __init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size)
> {
> - return memblock_setclr_flag(&memblock.memory, base, size, 0,
> - MEMBLOCK_KHO_SCRATCH);
> + if (is_kho_boot())
> + return memblock_setclr_flag(&memblock.memory, base, size, 0,
> + MEMBLOCK_KHO_SCRATCH);
> + return 0;
> }
>
> static bool should_skip_region(struct memblock_type *type,
> --
> 2.47.3
>
--
Sincerely yours,
Mike.
On Sun, Nov 30, 2025 at 3:52 AM Mike Rapoport <rppt@kernel.org> wrote:
>
> On Fri, Nov 28, 2025 at 05:29:34PM +0000, Usama Arif wrote:
> > The scratch memory for kexec handover is used to bootstrap the
> > kexec'ed kernel. Only the 1st 1MB is used as scratch, and its a
> > hack to get around limitations with KHO. It is only needed when
> > CONFIG_KEXEC_HANDOVER is enabled and only if it is a KHO boot
> > (both checked by is_kho_boot). Add check to prevent marking a KHO
> > scratch region unless needed.
>
> I'm going to rewrite the changelog and queue this for upstream:
>
> The scratch memory for kexec handover is used to bootstrap the kexec'ed
> kernel and it is only needed when it is a KHO boot, i.e. a kexec boot with
> handover data passed from the previous kernel.
>
> Currently x86 marks the first megabyte of memory as KHO scratch even for
> non-KHO boots if CONFIG_KEXEC_HANDOVER is enabled.
>
> Add check to prevent marking a KHO scratch regions unless they are actually
> needed.
>
> > Fixes: a2daf83e10378 ("x86/e820: temporarily enable KHO scratch for memory below 1M")
> > Reported-by: Vlad Poenaru <thevlad@meta.com>
> > Signed-off-by: Usama Arif <usamaarif642@gmail.com>
> > Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
This patch causes panic with my tests in linux-next.
[ 0.000000] Kernel panic - not syncing: Cannot allocate 17280 bytes
for node 0 data
[ 0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted
6.18.0-next-20251203 #2 PREEMPT(undef)
[ 0.000000] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
BIOS 0.1 11/11/2019
[ 0.000000] Call Trace:
[ 0.000000] <TASK>
[ 0.000000] ? dump_stack_lvl+0x4e/0x70
[ 0.000000] ? vpanic+0xcf/0x2b0
[ 0.000000] ? panic+0x66/0x66
[ 0.000000] ? alloc_node_data+0x32/0x90
[ 0.000000] ? numa_register_nodes+0x82/0x100
[ 0.000000] ? numa_init+0x36/0x120
[ 0.000000] ? setup_arch+0x667/0x7f0
[ 0.000000] ? start_kernel+0x58/0x640
[ 0.000000] ? x86_64_start_reservations+0x24/0x30
[ 0.000000] ? x86_64_start_kernel+0xc5/0xd0
[ 0.000000] ? common_startup_64+0x13e/0x148
[ 0.000000] </TASK>
[ 0.000000] ---[ end Kernel panic - not syncing: Cannot allocate
17280 bytes for node 0 data ]---
PANIC: early exception 0x0d IP 10:ffffffff89007a13 error 763 cr2
0xffff991090a01000
> > ---
> > mm/memblock.c | 14 +++++++++-----
> > 1 file changed, 9 insertions(+), 5 deletions(-)
> >
> > diff --git a/mm/memblock.c b/mm/memblock.c
> > index 8b13d5c28922a..913cf322eb89a 100644
> > --- a/mm/memblock.c
> > +++ b/mm/memblock.c
> > @@ -20,8 +20,8 @@
> >
> > #ifdef CONFIG_KEXEC_HANDOVER
> > #include <linux/libfdt.h>
> > -#include <linux/kexec_handover.h>
> > #endif /* CONFIG_KEXEC_HANDOVER */
> > +#include <linux/kexec_handover.h>
> >
> > #include <asm/sections.h>
> > #include <linux/io.h>
> > @@ -1126,8 +1126,10 @@ int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t
> > */
> > __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
> > {
> > - return memblock_setclr_flag(&memblock.memory, base, size, 1,
> > - MEMBLOCK_KHO_SCRATCH);
> > + if (is_kho_boot())
Looks like memblock_mark_kho_scratch() is called before is_kho_boot()
is working:
[ 0.000000] memblock_mark_kho_scratch: is_kho_boot: 0
[ 0.000000] memblock_mark_kho_scratch: is_kho_boot: 0
[ 0.000000] memblock_mark_kho_scratch: is_kho_boot: 0
[ 0.000000] KHO: kho_populate: is_kho_boot: 1
[ 0.000000] memblock_mark_kho_scratch: is_kho_boot: 1
[ 0.000000] memblock_clear_kho_scratch: is_kho_boot: 1
...
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -1514,6 +1514,7 @@ void __init kho_populate(phys_addr_t fdt_phys,
u64 fdt_len,
memblock_set_kho_scratch_only();
kho_in.fdt_phys = fdt_phys;
+ pr_err("%s: is_kho_boot: %d\n", __func__, is_kho_boot());
kho_in.scratch_phys = scratch_phys;
kho_scratch_cnt = scratch_cnt;
pr_info("found kexec handover data.\n");
diff --git a/mm/memblock.c b/mm/memblock.c
index 87e7495a68c1..f55b5cdba5dd 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1126,6 +1126,7 @@ int __init_memblock
memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t
*/
__init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
{
+ pr_err("%s: is_kho_boot: %d\n", __func__, is_kho_boot());
if (is_kho_boot())
return memblock_setclr_flag(&memblock.memory, base, size, 1,
MEMBLOCK_KHO_SCRATCH);
@@ -1142,6 +1143,7 @@ __init int memblock_mark_kho_scratch(phys_addr_t
base, phys_addr_t size)
*/
__init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size)
{
+ pr_err("%s: is_kho_boot: %d\n", __func__, is_kho_boot());
if (is_kho_boot())
return memblock_setclr_flag(&memblock.memory, base, size, 0,
MEMBLOCK_KHO_SCRATCH);
> > + return memblock_setclr_flag(&memblock.memory, base, size, 1,
> > + MEMBLOCK_KHO_SCRATCH);
> > + return 0;
> > }
> >
> > /**
> > @@ -1140,8 +1142,10 @@ __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
> > */
> > __init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size)
> > {
> > - return memblock_setclr_flag(&memblock.memory, base, size, 0,
> > - MEMBLOCK_KHO_SCRATCH);
> > + if (is_kho_boot())
> > + return memblock_setclr_flag(&memblock.memory, base, size, 0,
> > + MEMBLOCK_KHO_SCRATCH);
> > + return 0;
> > }
> >
> > static bool should_skip_region(struct memblock_type *type,
> > --
> > 2.47.3
> >
>
> --
> Sincerely yours,
> Mike.
>
On 04/12/2025 14:04, Pasha Tatashin wrote:
> On Sun, Nov 30, 2025 at 3:52 AM Mike Rapoport <rppt@kernel.org> wrote:
>>
>> On Fri, Nov 28, 2025 at 05:29:34PM +0000, Usama Arif wrote:
>>> The scratch memory for kexec handover is used to bootstrap the
>>> kexec'ed kernel. Only the 1st 1MB is used as scratch, and its a
>>> hack to get around limitations with KHO. It is only needed when
>>> CONFIG_KEXEC_HANDOVER is enabled and only if it is a KHO boot
>>> (both checked by is_kho_boot). Add check to prevent marking a KHO
>>> scratch region unless needed.
>>
>> I'm going to rewrite the changelog and queue this for upstream:
>>
>> The scratch memory for kexec handover is used to bootstrap the kexec'ed
>> kernel and it is only needed when it is a KHO boot, i.e. a kexec boot with
>> handover data passed from the previous kernel.
>>
>> Currently x86 marks the first megabyte of memory as KHO scratch even for
>> non-KHO boots if CONFIG_KEXEC_HANDOVER is enabled.
>>
>> Add check to prevent marking a KHO scratch regions unless they are actually
>> needed.
>>
>>> Fixes: a2daf83e10378 ("x86/e820: temporarily enable KHO scratch for memory below 1M")
>>> Reported-by: Vlad Poenaru <thevlad@meta.com>
>>> Signed-off-by: Usama Arif <usamaarif642@gmail.com>
>>> Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
>
> This patch causes panic with my tests in linux-next.
>
> [ 0.000000] Kernel panic - not syncing: Cannot allocate 17280 bytes
> for node 0 data
> [ 0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted
> 6.18.0-next-20251203 #2 PREEMPT(undef)
> [ 0.000000] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
> BIOS 0.1 11/11/2019
> [ 0.000000] Call Trace:
> [ 0.000000] <TASK>
> [ 0.000000] ? dump_stack_lvl+0x4e/0x70
> [ 0.000000] ? vpanic+0xcf/0x2b0
> [ 0.000000] ? panic+0x66/0x66
> [ 0.000000] ? alloc_node_data+0x32/0x90
> [ 0.000000] ? numa_register_nodes+0x82/0x100
> [ 0.000000] ? numa_init+0x36/0x120
> [ 0.000000] ? setup_arch+0x667/0x7f0
> [ 0.000000] ? start_kernel+0x58/0x640
> [ 0.000000] ? x86_64_start_reservations+0x24/0x30
> [ 0.000000] ? x86_64_start_kernel+0xc5/0xd0
> [ 0.000000] ? common_startup_64+0x13e/0x148
> [ 0.000000] </TASK>
> [ 0.000000] ---[ end Kernel panic - not syncing: Cannot allocate
> 17280 bytes for node 0 data ]---
> PANIC: early exception 0x0d IP 10:ffffffff89007a13 error 763 cr2
> 0xffff991090a01000
>
Thanks for reporting this and sorry for the bug!
So the patch was designed to remove the memblock_mark_kho_scratch in e820__memblock_setup if not
in KHO boot. But it broke memblock_mark_kho_scratch in kho_populate.
Moving kho_in.fdt_phys = fdt_phys to before the memblock_mark_scratch
should fix it. I dont have a setup where I can easily test KHO, but I think below
should fix it?
TBH using fdt_phys to check if the boot is KHO might be a bit hacky? Is it possible
to have a better check for this?
diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
index 9dc51fab604f1..c331749e6452e 100644
--- a/kernel/liveupdate/kexec_handover.c
+++ b/kernel/liveupdate/kexec_handover.c
@@ -1483,6 +1483,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
goto out;
}
+ kho_in.fdt_phys = fdt_phys;
/*
* We pass a safe contiguous blocks of memory to use for early boot
* purporses from the previous kernel so that we can resize the
@@ -1513,7 +1514,6 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
*/
memblock_set_kho_scratch_only();
- kho_in.fdt_phys = fdt_phys;
kho_in.scratch_phys = scratch_phys;
kho_scratch_cnt = scratch_cnt;
pr_info("found kexec handover data.\n");
@@ -1524,7 +1524,10 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
if (scratch)
early_memunmap(scratch, scratch_len);
if (err)
+ {
+ kho_in.fdt_phys = 0;
pr_warn("disabling KHO revival: %d\n", err);
+ }
}
>
>>> ---
>>> mm/memblock.c | 14 +++++++++-----
>>> 1 file changed, 9 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/mm/memblock.c b/mm/memblock.c
>>> index 8b13d5c28922a..913cf322eb89a 100644
>>> --- a/mm/memblock.c
>>> +++ b/mm/memblock.c
>>> @@ -20,8 +20,8 @@
>>>
>>> #ifdef CONFIG_KEXEC_HANDOVER
>>> #include <linux/libfdt.h>
>>> -#include <linux/kexec_handover.h>
>>> #endif /* CONFIG_KEXEC_HANDOVER */
>>> +#include <linux/kexec_handover.h>
>>>
>>> #include <asm/sections.h>
>>> #include <linux/io.h>
>>> @@ -1126,8 +1126,10 @@ int __init_memblock memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t
>>> */
>>> __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
>>> {
>>> - return memblock_setclr_flag(&memblock.memory, base, size, 1,
>>> - MEMBLOCK_KHO_SCRATCH);
>>> + if (is_kho_boot())
>
> Looks like memblock_mark_kho_scratch() is called before is_kho_boot()
> is working:
>
> [ 0.000000] memblock_mark_kho_scratch: is_kho_boot: 0
> [ 0.000000] memblock_mark_kho_scratch: is_kho_boot: 0
> [ 0.000000] memblock_mark_kho_scratch: is_kho_boot: 0
> [ 0.000000] KHO: kho_populate: is_kho_boot: 1
> [ 0.000000] memblock_mark_kho_scratch: is_kho_boot: 1
> [ 0.000000] memblock_clear_kho_scratch: is_kho_boot: 1
> ...
>
> --- a/kernel/liveupdate/kexec_handover.c
> +++ b/kernel/liveupdate/kexec_handover.c
> @@ -1514,6 +1514,7 @@ void __init kho_populate(phys_addr_t fdt_phys,
> u64 fdt_len,
> memblock_set_kho_scratch_only();
>
> kho_in.fdt_phys = fdt_phys;
> + pr_err("%s: is_kho_boot: %d\n", __func__, is_kho_boot());
> kho_in.scratch_phys = scratch_phys;
> kho_scratch_cnt = scratch_cnt;
> pr_info("found kexec handover data.\n");
> diff --git a/mm/memblock.c b/mm/memblock.c
> index 87e7495a68c1..f55b5cdba5dd 100644
> --- a/mm/memblock.c
> +++ b/mm/memblock.c
> @@ -1126,6 +1126,7 @@ int __init_memblock
> memblock_reserved_mark_noinit(phys_addr_t base, phys_addr_t
> */
> __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
> {
> + pr_err("%s: is_kho_boot: %d\n", __func__, is_kho_boot());
> if (is_kho_boot())
> return memblock_setclr_flag(&memblock.memory, base, size, 1,
> MEMBLOCK_KHO_SCRATCH);
> @@ -1142,6 +1143,7 @@ __init int memblock_mark_kho_scratch(phys_addr_t
> base, phys_addr_t size)
> */
> __init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size)
> {
> + pr_err("%s: is_kho_boot: %d\n", __func__, is_kho_boot());
> if (is_kho_boot())
> return memblock_setclr_flag(&memblock.memory, base, size, 0,
> MEMBLOCK_KHO_SCRATCH);
>
>>> + return memblock_setclr_flag(&memblock.memory, base, size, 1,
>>> + MEMBLOCK_KHO_SCRATCH);
>>> + return 0;
>>> }
>>>
>>> /**
>>> @@ -1140,8 +1142,10 @@ __init int memblock_mark_kho_scratch(phys_addr_t base, phys_addr_t size)
>>> */
>>> __init int memblock_clear_kho_scratch(phys_addr_t base, phys_addr_t size)
>>> {
>>> - return memblock_setclr_flag(&memblock.memory, base, size, 0,
>>> - MEMBLOCK_KHO_SCRATCH);
>>> + if (is_kho_boot())
>>> + return memblock_setclr_flag(&memblock.memory, base, size, 0,
>>> + MEMBLOCK_KHO_SCRATCH);
>>> + return 0;
>>> }
>>>
>>> static bool should_skip_region(struct memblock_type *type,
>>> --
>>> 2.47.3
>>>
>>
>> --
>> Sincerely yours,
>> Mike.
>>
Hi Usama,
On Thu, Dec 04, 2025 at 02:51:00PM +0000, Usama Arif wrote:
> > On Sun, Nov 30, 2025 at 3:52 AM Mike Rapoport <rppt@kernel.org> wrote:
> >>
> >> On Fri, Nov 28, 2025 at 05:29:34PM +0000, Usama Arif wrote:
> >>> The scratch memory for kexec handover is used to bootstrap the
> >>> kexec'ed kernel. Only the 1st 1MB is used as scratch, and its a
> >>> hack to get around limitations with KHO. It is only needed when
> >>> CONFIG_KEXEC_HANDOVER is enabled and only if it is a KHO boot
> >>> (both checked by is_kho_boot). Add check to prevent marking a KHO
> >>> scratch region unless needed.
> >>
> >> I'm going to rewrite the changelog and queue this for upstream:
> >>
> >> The scratch memory for kexec handover is used to bootstrap the kexec'ed
> >> kernel and it is only needed when it is a KHO boot, i.e. a kexec boot with
> >> handover data passed from the previous kernel.
> >>
> >> Currently x86 marks the first megabyte of memory as KHO scratch even for
> >> non-KHO boots if CONFIG_KEXEC_HANDOVER is enabled.
> >>
> >> Add check to prevent marking a KHO scratch regions unless they are actually
> >> needed.
> >>
> >>> Fixes: a2daf83e10378 ("x86/e820: temporarily enable KHO scratch for memory below 1M")
> >>> Reported-by: Vlad Poenaru <thevlad@meta.com>
> >>> Signed-off-by: Usama Arif <usamaarif642@gmail.com>
> >>> Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
> >
> > This patch causes panic with my tests in linux-next.
> >
> > [ 0.000000] Kernel panic - not syncing: Cannot allocate 17280 bytes
> > for node 0 data
> > [ 0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted
> > 6.18.0-next-20251203 #2 PREEMPT(undef)
> > [ 0.000000] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
> > BIOS 0.1 11/11/2019
> > [ 0.000000] Call Trace:
> > [ 0.000000] <TASK>
> > [ 0.000000] ? dump_stack_lvl+0x4e/0x70
> > [ 0.000000] ? vpanic+0xcf/0x2b0
> > [ 0.000000] ? panic+0x66/0x66
> > [ 0.000000] ? alloc_node_data+0x32/0x90
> > [ 0.000000] ? numa_register_nodes+0x82/0x100
> > [ 0.000000] ? numa_init+0x36/0x120
> > [ 0.000000] ? setup_arch+0x667/0x7f0
> > [ 0.000000] ? start_kernel+0x58/0x640
> > [ 0.000000] ? x86_64_start_reservations+0x24/0x30
> > [ 0.000000] ? x86_64_start_kernel+0xc5/0xd0
> > [ 0.000000] ? common_startup_64+0x13e/0x148
> > [ 0.000000] </TASK>
> > [ 0.000000] ---[ end Kernel panic - not syncing: Cannot allocate
> > 17280 bytes for node 0 data ]---
> > PANIC: early exception 0x0d IP 10:ffffffff89007a13 error 763 cr2
> > 0xffff991090a01000
> >
>
> Thanks for reporting this and sorry for the bug!
>
> So the patch was designed to remove the memblock_mark_kho_scratch in e820__memblock_setup if not
> in KHO boot. But it broke memblock_mark_kho_scratch in kho_populate.
> Moving kho_in.fdt_phys = fdt_phys to before the memblock_mark_scratch
> should fix it. I dont have a setup where I can easily test KHO, but I think below
> should fix it?
This might, but this is too late for v6.19-rc1.
For now I'm dropping this series from memblock/for-next.
We can resume working on this after merge window closes.
> TBH using fdt_phys to check if the boot is KHO might be a bit hacky? Is it possible
> to have a better check for this?
Presence of KHO FDT is a clear indication that it is a KHO boot.
The issue is that during early boot ordering is hard and it's not always
clear in which order features and configuration are detected and used.
> diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
> index 9dc51fab604f1..c331749e6452e 100644
> --- a/kernel/liveupdate/kexec_handover.c
> +++ b/kernel/liveupdate/kexec_handover.c
> @@ -1483,6 +1483,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
> goto out;
> }
>
> + kho_in.fdt_phys = fdt_phys;
> /*
> * We pass a safe contiguous blocks of memory to use for early boot
> * purporses from the previous kernel so that we can resize the
> @@ -1513,7 +1514,6 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
> */
> memblock_set_kho_scratch_only();
>
> - kho_in.fdt_phys = fdt_phys;
> kho_in.scratch_phys = scratch_phys;
> kho_scratch_cnt = scratch_cnt;
> pr_info("found kexec handover data.\n");
> @@ -1524,7 +1524,10 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
> if (scratch)
> early_memunmap(scratch, scratch_len);
> if (err)
> + {
> + kho_in.fdt_phys = 0;
> pr_warn("disabling KHO revival: %d\n", err);
> + }
> }
--
Sincerely yours,
Mike.
On 04/12/2025 17:52, Mike Rapoport wrote:
> Hi Usama,
>
> On Thu, Dec 04, 2025 at 02:51:00PM +0000, Usama Arif wrote:
>>> On Sun, Nov 30, 2025 at 3:52 AM Mike Rapoport <rppt@kernel.org> wrote:
>>>>
>>>> On Fri, Nov 28, 2025 at 05:29:34PM +0000, Usama Arif wrote:
>>>>> The scratch memory for kexec handover is used to bootstrap the
>>>>> kexec'ed kernel. Only the 1st 1MB is used as scratch, and its a
>>>>> hack to get around limitations with KHO. It is only needed when
>>>>> CONFIG_KEXEC_HANDOVER is enabled and only if it is a KHO boot
>>>>> (both checked by is_kho_boot). Add check to prevent marking a KHO
>>>>> scratch region unless needed.
>>>>
>>>> I'm going to rewrite the changelog and queue this for upstream:
>>>>
>>>> The scratch memory for kexec handover is used to bootstrap the kexec'ed
>>>> kernel and it is only needed when it is a KHO boot, i.e. a kexec boot with
>>>> handover data passed from the previous kernel.
>>>>
>>>> Currently x86 marks the first megabyte of memory as KHO scratch even for
>>>> non-KHO boots if CONFIG_KEXEC_HANDOVER is enabled.
>>>>
>>>> Add check to prevent marking a KHO scratch regions unless they are actually
>>>> needed.
>>>>
>>>>> Fixes: a2daf83e10378 ("x86/e820: temporarily enable KHO scratch for memory below 1M")
>>>>> Reported-by: Vlad Poenaru <thevlad@meta.com>
>>>>> Signed-off-by: Usama Arif <usamaarif642@gmail.com>
>>>>> Reviewed-by: Pratyush Yadav <pratyush@kernel.org>
>>>
>>> This patch causes panic with my tests in linux-next.
>>>
>>> [ 0.000000] Kernel panic - not syncing: Cannot allocate 17280 bytes
>>> for node 0 data
>>> [ 0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted
>>> 6.18.0-next-20251203 #2 PREEMPT(undef)
>>> [ 0.000000] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009),
>>> BIOS 0.1 11/11/2019
>>> [ 0.000000] Call Trace:
>>> [ 0.000000] <TASK>
>>> [ 0.000000] ? dump_stack_lvl+0x4e/0x70
>>> [ 0.000000] ? vpanic+0xcf/0x2b0
>>> [ 0.000000] ? panic+0x66/0x66
>>> [ 0.000000] ? alloc_node_data+0x32/0x90
>>> [ 0.000000] ? numa_register_nodes+0x82/0x100
>>> [ 0.000000] ? numa_init+0x36/0x120
>>> [ 0.000000] ? setup_arch+0x667/0x7f0
>>> [ 0.000000] ? start_kernel+0x58/0x640
>>> [ 0.000000] ? x86_64_start_reservations+0x24/0x30
>>> [ 0.000000] ? x86_64_start_kernel+0xc5/0xd0
>>> [ 0.000000] ? common_startup_64+0x13e/0x148
>>> [ 0.000000] </TASK>
>>> [ 0.000000] ---[ end Kernel panic - not syncing: Cannot allocate
>>> 17280 bytes for node 0 data ]---
>>> PANIC: early exception 0x0d IP 10:ffffffff89007a13 error 763 cr2
>>> 0xffff991090a01000
>>>
>>
>> Thanks for reporting this and sorry for the bug!
>>
>> So the patch was designed to remove the memblock_mark_kho_scratch in e820__memblock_setup if not
>> in KHO boot. But it broke memblock_mark_kho_scratch in kho_populate.
>> Moving kho_in.fdt_phys = fdt_phys to before the memblock_mark_scratch
>> should fix it. I dont have a setup where I can easily test KHO, but I think below
>> should fix it?
>
> This might, but this is too late for v6.19-rc1.
> For now I'm dropping this series from memblock/for-next.
> We can resume working on this after merge window closes.
>
Yes makes sense.
How would you like me to proceed with the fix? Should I send just the fix now,
or these 2 patches plus the fix after the merge window closes?
Thanks!
>> TBH using fdt_phys to check if the boot is KHO might be a bit hacky? Is it possible
>> to have a better check for this?
>
> Presence of KHO FDT is a clear indication that it is a KHO boot.
> The issue is that during early boot ordering is hard and it's not always
> clear in which order features and configuration are detected and used.
>
ack
>> diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
>> index 9dc51fab604f1..c331749e6452e 100644
>> --- a/kernel/liveupdate/kexec_handover.c
>> +++ b/kernel/liveupdate/kexec_handover.c
>> @@ -1483,6 +1483,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>> goto out;
>> }
>>
>> + kho_in.fdt_phys = fdt_phys;
>> /*
>> * We pass a safe contiguous blocks of memory to use for early boot
>> * purporses from the previous kernel so that we can resize the
>> @@ -1513,7 +1514,6 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>> */
>> memblock_set_kho_scratch_only();
>>
>> - kho_in.fdt_phys = fdt_phys;
>> kho_in.scratch_phys = scratch_phys;
>> kho_scratch_cnt = scratch_cnt;
>> pr_info("found kexec handover data.\n");
>> @@ -1524,7 +1524,10 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
>> if (scratch)
>> early_memunmap(scratch, scratch_len);
>> if (err)
>> + {
>> + kho_in.fdt_phys = 0;
>> pr_warn("disabling KHO revival: %d\n", err);
>> + }
>> }
>
On Thu, Dec 04, 2025 at 07:27:29PM +0000, Usama Arif wrote:
> On 04/12/2025 17:52, Mike Rapoport wrote:
> >>
> >> So the patch was designed to remove the memblock_mark_kho_scratch in e820__memblock_setup if not
> >> in KHO boot. But it broke memblock_mark_kho_scratch in kho_populate.
> >> Moving kho_in.fdt_phys = fdt_phys to before the memblock_mark_scratch
> >> should fix it. I dont have a setup where I can easily test KHO, but I think below
> >> should fix it?
There's a simple KHO sefltest in tools/testing/selftest/kho
> > This might, but this is too late for v6.19-rc1.
> > For now I'm dropping this series from memblock/for-next.
> > We can resume working on this after merge window closes.
> >
>
> Yes makes sense.
>
> How would you like me to proceed with the fix? Should I send just the fix now,
> or these 2 patches plus the fix after the merge window closes?
The fix should come before the changes in memblock_mark_kho_scratch(), so
please resend the whole series.
> >> diff --git a/kernel/liveupdate/kexec_handover.c b/kernel/liveupdate/kexec_handover.c
> >> index 9dc51fab604f1..c331749e6452e 100644
> >> --- a/kernel/liveupdate/kexec_handover.c
> >> +++ b/kernel/liveupdate/kexec_handover.c
> >> @@ -1483,6 +1483,7 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
> >> goto out;
> >> }
> >>
> >> + kho_in.fdt_phys = fdt_phys;
This should happen before the calls to memblock_mark_kho_scratch().
> >> /*
> >> * We pass a safe contiguous blocks of memory to use for early boot
> >> * purporses from the previous kernel so that we can resize the
> >> @@ -1513,7 +1514,6 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
> >> */
> >> memblock_set_kho_scratch_only();
> >>
> >> - kho_in.fdt_phys = fdt_phys;
> >> kho_in.scratch_phys = scratch_phys;
> >> kho_scratch_cnt = scratch_cnt;
> >> pr_info("found kexec handover data.\n");
> >> @@ -1524,7 +1524,10 @@ void __init kho_populate(phys_addr_t fdt_phys, u64 fdt_len,
> >> if (scratch)
> >> early_memunmap(scratch, scratch_len);
> >> if (err)
> >> + {
> >> + kho_in.fdt_phys = 0;
> >> pr_warn("disabling KHO revival: %d\n", err);
> >> + }
> >> }
> >
>
--
Sincerely yours,
Mike.
© 2016 - 2026 Red Hat, Inc.