arch/x86/kernel/kexec-bzimage64.c | 46 +++++++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-)
The kexec_file_load syscall on x86 currently does not support passing
a device tree blob to the new kernel.
To add support for this, we copy the behavior of ARM64 and PowerPC and
copy the current boot's device tree blob for use in the new kernel. We
do this on x86 by passing the device tree blob as a setup_data entry in
accordance with the x86 boot protocol.
Signed-off-by: Brian Mak <makb@juniper.net>
---
arch/x86/kernel/kexec-bzimage64.c | 46 +++++++++++++++++++++++++++++--
1 file changed, 43 insertions(+), 3 deletions(-)
diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c
index 24a41f0e0cf1..c24536c25f98 100644
--- a/arch/x86/kernel/kexec-bzimage64.c
+++ b/arch/x86/kernel/kexec-bzimage64.c
@@ -16,6 +16,8 @@
#include <linux/kexec.h>
#include <linux/kernel.h>
#include <linux/mm.h>
+#include <linux/libfdt.h>
+#include <linux/of_fdt.h>
#include <linux/efi.h>
#include <linux/random.h>
@@ -212,6 +214,28 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr,
}
#endif /* CONFIG_EFI */
+#ifdef CONFIG_OF_FLATTREE
+static void setup_dtb(struct boot_params *params,
+ unsigned long params_load_addr,
+ unsigned int dtb_setup_data_offset)
+{
+ struct setup_data *sd = (void *)params + dtb_setup_data_offset;
+ unsigned long setup_data_phys, dtb_len;
+
+ dtb_len = fdt_totalsize(initial_boot_params);
+ sd->type = SETUP_DTB;
+ sd->len = dtb_len;
+
+ /* Carry over current boot DTB with setup_data */
+ memcpy(sd->data, initial_boot_params, dtb_len);
+
+ /* Add setup data */
+ setup_data_phys = params_load_addr + dtb_setup_data_offset;
+ sd->next = params->hdr.setup_data;
+ params->hdr.setup_data = setup_data_phys;
+}
+#endif /* CONFIG_OF_FLATTREE */
+
static void
setup_ima_state(const struct kimage *image, struct boot_params *params,
unsigned long params_load_addr,
@@ -336,6 +360,16 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params,
sizeof(struct efi_setup_data);
#endif
+#ifdef CONFIG_OF_FLATTREE
+ if (initial_boot_params) {
+ setup_dtb(params, params_load_addr, setup_data_offset);
+ setup_data_offset += sizeof(struct setup_data) +
+ fdt_totalsize(initial_boot_params);
+ } else {
+ pr_info("No DTB\n");
+ }
+#endif
+
if (IS_ENABLED(CONFIG_IMA_KEXEC)) {
/* Setup IMA log buffer state */
setup_ima_state(image, params, params_load_addr,
@@ -529,6 +563,12 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
sizeof(struct setup_data) +
RNG_SEED_LENGTH;
+#ifdef CONFIG_OF_FLATTREE
+ if (initial_boot_params)
+ kbuf.bufsz += sizeof(struct setup_data) +
+ fdt_totalsize(initial_boot_params);
+#endif
+
if (IS_ENABLED(CONFIG_IMA_KEXEC))
kbuf.bufsz += sizeof(struct setup_data) +
sizeof(struct ima_setup_data);
@@ -537,7 +577,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
kbuf.bufsz += sizeof(struct setup_data) +
sizeof(struct kho_data);
- params = kzalloc(kbuf.bufsz, GFP_KERNEL);
+ params = kvzalloc(kbuf.bufsz, GFP_KERNEL);
if (!params)
return ERR_PTR(-ENOMEM);
efi_map_offset = params_cmdline_sz;
@@ -647,7 +687,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel,
return ldata;
out_free_params:
- kfree(params);
+ kvfree(params);
return ERR_PTR(ret);
}
@@ -659,7 +699,7 @@ static int bzImage64_cleanup(void *loader_data)
if (!ldata)
return 0;
- kfree(ldata->bootparams_buf);
+ kvfree(ldata->bootparams_buf);
ldata->bootparams_buf = NULL;
return 0;
base-commit: d7b8f8e20813f0179d8ef519541a3527e7661d3a
--
2.25.1
On 07/29/25 at 11:21am, Brian Mak wrote: > The kexec_file_load syscall on x86 currently does not support passing > a device tree blob to the new kernel. > > To add support for this, we copy the behavior of ARM64 and PowerPC and > copy the current boot's device tree blob for use in the new kernel. We > do this on x86 by passing the device tree blob as a setup_data entry in > accordance with the x86 boot protocol. I see how, but no why. Why do we need to add DTB for x86?
On Jul 31, 2025, at 8:02 PM, Baoquan He <bhe@redhat.com> wrote: > On 07/29/25 at 11:21am, Brian Mak wrote: >> The kexec_file_load syscall on x86 currently does not support passing >> a device tree blob to the new kernel. >> >> To add support for this, we copy the behavior of ARM64 and PowerPC and >> copy the current boot's device tree blob for use in the new kernel. We >> do this on x86 by passing the device tree blob as a setup_data entry in >> accordance with the x86 boot protocol. > > I see how, but no why. Why do we need to add DTB for x86? Hi Baoquan, Thanks for your comment. Some embedded x86 systems, such as ours at Juniper, use device trees. Currently, the x86 kexec_file_load syscall has no support for passing the device tree to the new kernel, which is a problem for us since we use kexec for some software upgrade related features. Not passing a device tree to the new kernel would cause a boot failure here. Thanks, Brian
On 08/01/25 at 07:24pm, Brian Mak wrote: > On Jul 31, 2025, at 8:02 PM, Baoquan He <bhe@redhat.com> wrote: > > > On 07/29/25 at 11:21am, Brian Mak wrote: > >> The kexec_file_load syscall on x86 currently does not support passing > >> a device tree blob to the new kernel. > >> > >> To add support for this, we copy the behavior of ARM64 and PowerPC and > >> copy the current boot's device tree blob for use in the new kernel. We > >> do this on x86 by passing the device tree blob as a setup_data entry in > >> accordance with the x86 boot protocol. > > > > I see how, but no why. Why do we need to add DTB for x86? > > Hi Baoquan, > > Thanks for your comment. Some embedded x86 systems, such as ours at > Juniper, use device trees. Currently, the x86 kexec_file_load syscall > has no support for passing the device tree to the new kernel, which is a > problem for us since we use kexec for some software upgrade related > features. Not passing a device tree to the new kernel would cause a boot > failure here. Thanks for the info. Please add these into patch log to let reviewers know them. We don't add code w/o objective.
On Aug 2, 2025, at 9:05 PM, Baoquan He <bhe@redhat.com> wrote: > Thanks for the info. Please add these into patch log to let reviewers > know them. We don't add code w/o objective. Hi Baoquan and Dave, I've raised a v2 patch with the requested changes: https://lore.kernel.org/lkml/20250805211527.122367-1-makb@juniper.net/T/#u Thanks, Brian
On Tue, Jul 29, 2025 at 11:21:42AM -0700, Brian Mak wrote: > The kexec_file_load syscall on x86 currently does not support passing > a device tree blob to the new kernel. > > To add support for this, we copy the behavior of ARM64 and PowerPC and > copy the current boot's device tree blob for use in the new kernel. We > do this on x86 by passing the device tree blob as a setup_data entry in > accordance with the x86 boot protocol. > > Signed-off-by: Brian Mak <makb@juniper.net> > --- > arch/x86/kernel/kexec-bzimage64.c | 46 +++++++++++++++++++++++++++++-- > 1 file changed, 43 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/kernel/kexec-bzimage64.c b/arch/x86/kernel/kexec-bzimage64.c > index 24a41f0e0cf1..c24536c25f98 100644 > --- a/arch/x86/kernel/kexec-bzimage64.c > +++ b/arch/x86/kernel/kexec-bzimage64.c > @@ -16,6 +16,8 @@ > #include <linux/kexec.h> > #include <linux/kernel.h> > #include <linux/mm.h> > +#include <linux/libfdt.h> > +#include <linux/of_fdt.h> > #include <linux/efi.h> > #include <linux/random.h> > > @@ -212,6 +214,28 @@ setup_efi_state(struct boot_params *params, unsigned long params_load_addr, > } > #endif /* CONFIG_EFI */ > > +#ifdef CONFIG_OF_FLATTREE > +static void setup_dtb(struct boot_params *params, > + unsigned long params_load_addr, > + unsigned int dtb_setup_data_offset) > +{ > + struct setup_data *sd = (void *)params + dtb_setup_data_offset; > + unsigned long setup_data_phys, dtb_len; > + > + dtb_len = fdt_totalsize(initial_boot_params); > + sd->type = SETUP_DTB; > + sd->len = dtb_len; > + > + /* Carry over current boot DTB with setup_data */ > + memcpy(sd->data, initial_boot_params, dtb_len); > + > + /* Add setup data */ > + setup_data_phys = params_load_addr + dtb_setup_data_offset; > + sd->next = params->hdr.setup_data; > + params->hdr.setup_data = setup_data_phys; > +} > +#endif /* CONFIG_OF_FLATTREE */ > + > static void > setup_ima_state(const struct kimage *image, struct boot_params *params, > unsigned long params_load_addr, > @@ -336,6 +360,16 @@ setup_boot_parameters(struct kimage *image, struct boot_params *params, > sizeof(struct efi_setup_data); > #endif > > +#ifdef CONFIG_OF_FLATTREE > + if (initial_boot_params) { > + setup_dtb(params, params_load_addr, setup_data_offset); > + setup_data_offset += sizeof(struct setup_data) + > + fdt_totalsize(initial_boot_params); I suppose current boot dtb should be valid for the current runnning kernel, if you use kexec to load another kernel the next kexec reboot could fail due to unmatching dtb. Make this unconditionally could break the previous working kexec? > + } else { > + pr_info("No DTB\n"); pr_debug sounds better. > + } > +#endif > + > if (IS_ENABLED(CONFIG_IMA_KEXEC)) { > /* Setup IMA log buffer state */ > setup_ima_state(image, params, params_load_addr, > @@ -529,6 +563,12 @@ static void *bzImage64_load(struct kimage *image, char *kernel, > sizeof(struct setup_data) + > RNG_SEED_LENGTH; > > +#ifdef CONFIG_OF_FLATTREE > + if (initial_boot_params) > + kbuf.bufsz += sizeof(struct setup_data) + > + fdt_totalsize(initial_boot_params); > +#endif > + > if (IS_ENABLED(CONFIG_IMA_KEXEC)) > kbuf.bufsz += sizeof(struct setup_data) + > sizeof(struct ima_setup_data); > @@ -537,7 +577,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, > kbuf.bufsz += sizeof(struct setup_data) + > sizeof(struct kho_data); > > - params = kzalloc(kbuf.bufsz, GFP_KERNEL); > + params = kvzalloc(kbuf.bufsz, GFP_KERNEL); > if (!params) > return ERR_PTR(-ENOMEM); > efi_map_offset = params_cmdline_sz; > @@ -647,7 +687,7 @@ static void *bzImage64_load(struct kimage *image, char *kernel, > return ldata; > > out_free_params: > - kfree(params); > + kvfree(params); > return ERR_PTR(ret); > } > > @@ -659,7 +699,7 @@ static int bzImage64_cleanup(void *loader_data) > if (!ldata) > return 0; > > - kfree(ldata->bootparams_buf); > + kvfree(ldata->bootparams_buf); > ldata->bootparams_buf = NULL; > > return 0; > > base-commit: d7b8f8e20813f0179d8ef519541a3527e7661d3a > -- > 2.25.1 > Thanks Dave
On Jul 30, 2025, at 12:31 AM, Dave Young <dyoung@redhat.com> wrote >> +#ifdef CONFIG_OF_FLATTREE >> + if (initial_boot_params) { >> + setup_dtb(params, params_load_addr, setup_data_offset); >> + setup_data_offset += sizeof(struct setup_data) + >> + fdt_totalsize(initial_boot_params); > > I suppose current boot dtb should be valid for the current runnning > kernel, if you use kexec to load another kernel the next kexec reboot > could fail due to unmatching dtb. > > Make this unconditionally could break the previous working kexec? Hi Dave, Thanks for taking the time to look at this change! The behavior on ARM64 for carrying over the current boot DTB is unconditional, which is why I've made it unconditional here as well. I'm open to suggestions on this though. Realistically, would there be a case where having no DTB wouldn't break, but carrying over the current DTB would? > >> + } else { >> + pr_info("No DTB\n"); > > pr_debug sounds better. Agreed. I'll raise it in a v2 in a few days if there's no other comments. Thanks, Brian
Hi Brian, On Thu, 31 Jul 2025 at 01:02, Brian Mak <makb@juniper.net> wrote: > > On Jul 30, 2025, at 12:31 AM, Dave Young <dyoung@redhat.com> wrote > > >> +#ifdef CONFIG_OF_FLATTREE > >> + if (initial_boot_params) { > >> + setup_dtb(params, params_load_addr, setup_data_offset); > >> + setup_data_offset += sizeof(struct setup_data) + > >> + fdt_totalsize(initial_boot_params); > > > > I suppose current boot dtb should be valid for the current runnning > > kernel, if you use kexec to load another kernel the next kexec reboot > > could fail due to unmatching dtb. > > > > Make this unconditionally could break the previous working kexec? > > Hi Dave, > > Thanks for taking the time to look at this change! > > The behavior on ARM64 for carrying over the current boot DTB is > unconditional, which is why I've made it unconditional here as well. I'm > open to suggestions on this though. Realistically, would there be a case > where having no DTB wouldn't break, but carrying over the current DTB > would? I worry about it since dtb is for providing boot related information, weird things could happen if the kernel versions are different. About arm64 and powerpc I think maybe just nobody noticed this problem. IMO it is wrong as kexec is designed to load different kernels not limited to current running kernels. Otherwise the current kexec_file_load syscall only supports passing kernel_fd, initrd_fd and cmdline, no extra param designed for dtb, I don't know if there are other ways via attaching dtb to kernel or initrd. Probably you can try to add a new flag to the kexec_file_load syscall, when this flag is set 1 then use the current dtb, otherwise do nothing. For arm64 and power, ideally doing the same is better, but I'm not sure if we should change the old behavior, maybe they can go with default to load the dtb, but users can choose not to do that. Thanks Dave
© 2016 - 2025 Red Hat, Inc.