arch/x86/include/asm/e820/api.h | 2 ++ arch/x86/kernel/e820.c | 6 ++++++ arch/x86/platform/efi/efi.c | 9 +++++++++ drivers/firmware/efi/tpm.c | 2 +- include/linux/efi.h | 7 +++++++ 5 files changed, 25 insertions(+), 1 deletion(-)
Looking at the TPM spec [1]
If the ACPI TPM2 table contains the address and size of the Platform
Firmware TCG log, firmware “pins” the memory associated with the
Platform FirmwareTCG log, and reports this memory as “Reserved” memory
via the INT 15h/E820 interface.
It looks like the firmware should pass this as reserved in e820 memory
map. However, it doesn't seem to. The firmware being tested on is:
dmidecode -s bios-version
edk2-20240214-2.el9
When this area is not reserved, it comes up as usable in
/sys/firmware/memmap. This means that kexec, which uses that memmap
to find usable memory regions, can select the region where efi.tpm_log
is and overwrite it and relocate_kernel.
Having a fix in firmware can be difficult to get through. As a secondary
fix, this patch marks that region as reserved in e820_table_firmware if it
is currently E820_TYPE_RAM so that kexec doesn't use it for kernel segments.
[1] https://trustedcomputinggroup.org/wp-content/uploads/PC-ClientPlatform_Profile_for_TPM_2p0_Systems_v49_161114_public-review.pdf
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
---
arch/x86/include/asm/e820/api.h | 2 ++
arch/x86/kernel/e820.c | 6 ++++++
arch/x86/platform/efi/efi.c | 9 +++++++++
drivers/firmware/efi/tpm.c | 2 +-
include/linux/efi.h | 7 +++++++
5 files changed, 25 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h
index 2e74a7f0e935..4e9aa24f03bd 100644
--- a/arch/x86/include/asm/e820/api.h
+++ b/arch/x86/include/asm/e820/api.h
@@ -16,6 +16,8 @@ extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type);
extern void e820__range_add (u64 start, u64 size, enum e820_type type);
extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
+extern u64 e820__range_update_firmware(u64 start, u64 size, enum e820_type old_type,
+ enum e820_type new_type);
extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type);
extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type);
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 4893d30ce438..912400161623 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -538,6 +538,12 @@ u64 __init e820__range_update_table(struct e820_table *t, u64 start, u64 size,
return __e820__range_update(t, start, size, old_type, new_type);
}
+u64 __init e820__range_update_firmware(u64 start, u64 size, enum e820_type old_type,
+ enum e820_type new_type)
+{
+ return __e820__range_update(e820_table_firmware, start, size, old_type, new_type);
+}
+
/* Remove a range of memory from the E820 table: */
u64 __init e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type)
{
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 88a96816de9a..aa95f77d7a30 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -171,6 +171,15 @@ static void __init do_add_efi_memmap(void)
e820__update_table(e820_table);
}
+/* Reserve firmware area if it was marked as RAM */
+void arch_update_firmware_area(u64 addr, u64 size)
+{
+ if (e820__get_entry_type(addr, addr + size) == E820_TYPE_RAM) {
+ e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED);
+ e820__update_table(e820_table_firmware);
+ }
+}
+
/*
* Given add_efi_memmap defaults to 0 and there is no alternative
* e820 mechanism for soft-reserved memory, import the full EFI memory
diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c
index e8d69bd548f3..8e6e7131d718 100644
--- a/drivers/firmware/efi/tpm.c
+++ b/drivers/firmware/efi/tpm.c
@@ -60,6 +60,7 @@ int __init efi_tpm_eventlog_init(void)
}
tbl_size = sizeof(*log_tbl) + log_tbl->size;
+ arch_update_firmware_area(efi.tpm_log, tbl_size);
memblock_reserve(efi.tpm_log, tbl_size);
if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) {
@@ -107,4 +108,3 @@ int __init efi_tpm_eventlog_init(void)
early_memunmap(log_tbl, sizeof(*log_tbl));
return ret;
}
-
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 6bf3c4fe8511..9c239cdff771 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1371,4 +1371,11 @@ extern struct blocking_notifier_head efivar_ops_nh;
void efivars_generic_ops_register(void);
void efivars_generic_ops_unregister(void);
+#ifdef CONFIG_X86_64
+void __init arch_update_firmware_area(u64 addr, u64 size);
+#else
+static inline void __init arch_update_firmware_area(u64 addr, u64 size)
+{
+}
+#endif
#endif /* _LINUX_EFI_H */
--
2.43.5
On Wed, 11 Sept 2024 at 12:41, Usama Arif <usamaarif642@gmail.com> wrote: > > Looking at the TPM spec [1] > > If the ACPI TPM2 table contains the address and size of the Platform > Firmware TCG log, firmware “pins” the memory associated with the > Platform FirmwareTCG log, and reports this memory as “Reserved” memory > via the INT 15h/E820 interface. > > It looks like the firmware should pass this as reserved in e820 memory > map. However, it doesn't seem to. The firmware being tested on is: > dmidecode -s bios-version > edk2-20240214-2.el9 > > When this area is not reserved, it comes up as usable in > /sys/firmware/memmap. This means that kexec, which uses that memmap > to find usable memory regions, can select the region where efi.tpm_log > is and overwrite it and relocate_kernel. > > Having a fix in firmware can be difficult to get through. As a secondary > fix, this patch marks that region as reserved in e820_table_firmware if it > is currently E820_TYPE_RAM so that kexec doesn't use it for kernel segments. > > [1] https://trustedcomputinggroup.org/wp-content/uploads/PC-ClientPlatform_Profile_for_TPM_2p0_Systems_v49_161114_public-review.pdf > > Signed-off-by: Usama Arif <usamaarif642@gmail.com> I would expect the EFI memory map to E820 conversion implemented in the EFI stub to take care of this. If you are not booting via the EFI stub, the bootloader is performing this conversion, and so it should be done there instead. > --- > arch/x86/include/asm/e820/api.h | 2 ++ > arch/x86/kernel/e820.c | 6 ++++++ > arch/x86/platform/efi/efi.c | 9 +++++++++ > drivers/firmware/efi/tpm.c | 2 +- > include/linux/efi.h | 7 +++++++ > 5 files changed, 25 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h > index 2e74a7f0e935..4e9aa24f03bd 100644 > --- a/arch/x86/include/asm/e820/api.h > +++ b/arch/x86/include/asm/e820/api.h > @@ -16,6 +16,8 @@ extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type); > > extern void e820__range_add (u64 start, u64 size, enum e820_type type); > extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type); > +extern u64 e820__range_update_firmware(u64 start, u64 size, enum e820_type old_type, > + enum e820_type new_type); > extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type); > extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type); > > diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c > index 4893d30ce438..912400161623 100644 > --- a/arch/x86/kernel/e820.c > +++ b/arch/x86/kernel/e820.c > @@ -538,6 +538,12 @@ u64 __init e820__range_update_table(struct e820_table *t, u64 start, u64 size, > return __e820__range_update(t, start, size, old_type, new_type); > } > > +u64 __init e820__range_update_firmware(u64 start, u64 size, enum e820_type old_type, > + enum e820_type new_type) > +{ > + return __e820__range_update(e820_table_firmware, start, size, old_type, new_type); > +} > + > /* Remove a range of memory from the E820 table: */ > u64 __init e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type) > { > diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c > index 88a96816de9a..aa95f77d7a30 100644 > --- a/arch/x86/platform/efi/efi.c > +++ b/arch/x86/platform/efi/efi.c > @@ -171,6 +171,15 @@ static void __init do_add_efi_memmap(void) > e820__update_table(e820_table); > } > > +/* Reserve firmware area if it was marked as RAM */ > +void arch_update_firmware_area(u64 addr, u64 size) > +{ > + if (e820__get_entry_type(addr, addr + size) == E820_TYPE_RAM) { > + e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > + e820__update_table(e820_table_firmware); > + } > +} > + > /* > * Given add_efi_memmap defaults to 0 and there is no alternative > * e820 mechanism for soft-reserved memory, import the full EFI memory > diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c > index e8d69bd548f3..8e6e7131d718 100644 > --- a/drivers/firmware/efi/tpm.c > +++ b/drivers/firmware/efi/tpm.c > @@ -60,6 +60,7 @@ int __init efi_tpm_eventlog_init(void) > } > > tbl_size = sizeof(*log_tbl) + log_tbl->size; > + arch_update_firmware_area(efi.tpm_log, tbl_size); > memblock_reserve(efi.tpm_log, tbl_size); > > if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { > @@ -107,4 +108,3 @@ int __init efi_tpm_eventlog_init(void) > early_memunmap(log_tbl, sizeof(*log_tbl)); > return ret; > } > - > diff --git a/include/linux/efi.h b/include/linux/efi.h > index 6bf3c4fe8511..9c239cdff771 100644 > --- a/include/linux/efi.h > +++ b/include/linux/efi.h > @@ -1371,4 +1371,11 @@ extern struct blocking_notifier_head efivar_ops_nh; > void efivars_generic_ops_register(void); > void efivars_generic_ops_unregister(void); > > +#ifdef CONFIG_X86_64 > +void __init arch_update_firmware_area(u64 addr, u64 size); > +#else > +static inline void __init arch_update_firmware_area(u64 addr, u64 size) > +{ > +} > +#endif > #endif /* _LINUX_EFI_H */ > -- > 2.43.5 >
On 11/09/2024 12:51, Ard Biesheuvel wrote: > On Wed, 11 Sept 2024 at 12:41, Usama Arif <usamaarif642@gmail.com> wrote: >> >> Looking at the TPM spec [1] >> >> If the ACPI TPM2 table contains the address and size of the Platform >> Firmware TCG log, firmware “pins” the memory associated with the >> Platform FirmwareTCG log, and reports this memory as “Reserved” memory >> via the INT 15h/E820 interface. >> >> It looks like the firmware should pass this as reserved in e820 memory >> map. However, it doesn't seem to. The firmware being tested on is: >> dmidecode -s bios-version >> edk2-20240214-2.el9 >> >> When this area is not reserved, it comes up as usable in >> /sys/firmware/memmap. This means that kexec, which uses that memmap >> to find usable memory regions, can select the region where efi.tpm_log >> is and overwrite it and relocate_kernel. >> >> Having a fix in firmware can be difficult to get through. As a secondary >> fix, this patch marks that region as reserved in e820_table_firmware if it >> is currently E820_TYPE_RAM so that kexec doesn't use it for kernel segments. >> >> [1] https://trustedcomputinggroup.org/wp-content/uploads/PC-ClientPlatform_Profile_for_TPM_2p0_Systems_v49_161114_public-review.pdf >> >> Signed-off-by: Usama Arif <usamaarif642@gmail.com> > > I would expect the EFI memory map to E820 conversion implemented in > the EFI stub to take care of this. > So I have been making a prototype with EFI stub, and the unfinished version is looking like a horrible hack. The only way to do this in libstub is to pass log_tbl all the way from efi_retrieve_tcg2_eventlog to efi_stub_entry and from there to setup_e820. While going through the efi memory map and converting it to e820 table in setup_e820, you have to check if log_tbl falls in any of the ranges and if the range is E820_TYPE_RAM. If that condition is satisfied, then you have to split that range into 3. i.e. the E820_TYPE_RAM range before tpm_log, the tpm_log E820_TYPE_RESERVED range, and the E820_TYPE_RAM range after. There are no helper functions, so this splitting involves playing with a lot of pointers, and it looks quite ugly. I believe doing this way is more likely to introduce bugs. If we are having to compensate for an EFI bug, would it make sense to do it in the way done in RFC and do it in kernel rather than libstub? It is simple and very likely to be bug free. Thanks, Usama > If you are not booting via the EFI stub, the bootloader is performing > this conversion, and so it should be done there instead. > > >> --- >> arch/x86/include/asm/e820/api.h | 2 ++ >> arch/x86/kernel/e820.c | 6 ++++++ >> arch/x86/platform/efi/efi.c | 9 +++++++++ >> drivers/firmware/efi/tpm.c | 2 +- >> include/linux/efi.h | 7 +++++++ >> 5 files changed, 25 insertions(+), 1 deletion(-) >> >> diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h >> index 2e74a7f0e935..4e9aa24f03bd 100644 >> --- a/arch/x86/include/asm/e820/api.h >> +++ b/arch/x86/include/asm/e820/api.h >> @@ -16,6 +16,8 @@ extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type); >> >> extern void e820__range_add (u64 start, u64 size, enum e820_type type); >> extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type); >> +extern u64 e820__range_update_firmware(u64 start, u64 size, enum e820_type old_type, >> + enum e820_type new_type); >> extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type); >> extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type); >> >> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c >> index 4893d30ce438..912400161623 100644 >> --- a/arch/x86/kernel/e820.c >> +++ b/arch/x86/kernel/e820.c >> @@ -538,6 +538,12 @@ u64 __init e820__range_update_table(struct e820_table *t, u64 start, u64 size, >> return __e820__range_update(t, start, size, old_type, new_type); >> } >> >> +u64 __init e820__range_update_firmware(u64 start, u64 size, enum e820_type old_type, >> + enum e820_type new_type) >> +{ >> + return __e820__range_update(e820_table_firmware, start, size, old_type, new_type); >> +} >> + >> /* Remove a range of memory from the E820 table: */ >> u64 __init e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type) >> { >> diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c >> index 88a96816de9a..aa95f77d7a30 100644 >> --- a/arch/x86/platform/efi/efi.c >> +++ b/arch/x86/platform/efi/efi.c >> @@ -171,6 +171,15 @@ static void __init do_add_efi_memmap(void) >> e820__update_table(e820_table); >> } >> >> +/* Reserve firmware area if it was marked as RAM */ >> +void arch_update_firmware_area(u64 addr, u64 size) >> +{ >> + if (e820__get_entry_type(addr, addr + size) == E820_TYPE_RAM) { >> + e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); >> + e820__update_table(e820_table_firmware); >> + } >> +} >> + >> /* >> * Given add_efi_memmap defaults to 0 and there is no alternative >> * e820 mechanism for soft-reserved memory, import the full EFI memory >> diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c >> index e8d69bd548f3..8e6e7131d718 100644 >> --- a/drivers/firmware/efi/tpm.c >> +++ b/drivers/firmware/efi/tpm.c >> @@ -60,6 +60,7 @@ int __init efi_tpm_eventlog_init(void) >> } >> >> tbl_size = sizeof(*log_tbl) + log_tbl->size; >> + arch_update_firmware_area(efi.tpm_log, tbl_size); >> memblock_reserve(efi.tpm_log, tbl_size); >> >> if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { >> @@ -107,4 +108,3 @@ int __init efi_tpm_eventlog_init(void) >> early_memunmap(log_tbl, sizeof(*log_tbl)); >> return ret; >> } >> - >> diff --git a/include/linux/efi.h b/include/linux/efi.h >> index 6bf3c4fe8511..9c239cdff771 100644 >> --- a/include/linux/efi.h >> +++ b/include/linux/efi.h >> @@ -1371,4 +1371,11 @@ extern struct blocking_notifier_head efivar_ops_nh; >> void efivars_generic_ops_register(void); >> void efivars_generic_ops_unregister(void); >> >> +#ifdef CONFIG_X86_64 >> +void __init arch_update_firmware_area(u64 addr, u64 size); >> +#else >> +static inline void __init arch_update_firmware_area(u64 addr, u64 size) >> +{ >> +} >> +#endif >> #endif /* _LINUX_EFI_H */ >> -- >> 2.43.5 >>
On Thu, 12 Sept 2024 at 12:23, Usama Arif <usamaarif642@gmail.com> wrote: > > > > On 11/09/2024 12:51, Ard Biesheuvel wrote: > > On Wed, 11 Sept 2024 at 12:41, Usama Arif <usamaarif642@gmail.com> wrote: > >> > >> Looking at the TPM spec [1] > >> > >> If the ACPI TPM2 table contains the address and size of the Platform > >> Firmware TCG log, firmware “pins” the memory associated with the > >> Platform FirmwareTCG log, and reports this memory as “Reserved” memory > >> via the INT 15h/E820 interface. > >> > >> It looks like the firmware should pass this as reserved in e820 memory > >> map. However, it doesn't seem to. The firmware being tested on is: > >> dmidecode -s bios-version > >> edk2-20240214-2.el9 > >> > >> When this area is not reserved, it comes up as usable in > >> /sys/firmware/memmap. This means that kexec, which uses that memmap > >> to find usable memory regions, can select the region where efi.tpm_log > >> is and overwrite it and relocate_kernel. > >> > >> Having a fix in firmware can be difficult to get through. As a secondary > >> fix, this patch marks that region as reserved in e820_table_firmware if it > >> is currently E820_TYPE_RAM so that kexec doesn't use it for kernel segments. > >> > >> [1] https://trustedcomputinggroup.org/wp-content/uploads/PC-ClientPlatform_Profile_for_TPM_2p0_Systems_v49_161114_public-review.pdf > >> > >> Signed-off-by: Usama Arif <usamaarif642@gmail.com> > > > > I would expect the EFI memory map to E820 conversion implemented in > > the EFI stub to take care of this. > > > > So I have been making a prototype with EFI stub, and the unfinished version is looking like a > horrible hack. > > The only way to do this in libstub is to pass log_tbl all the way from efi_retrieve_tcg2_eventlog > to efi_stub_entry and from there to setup_e820. > While going through the efi memory map and converting it to e820 table in setup_e820, you have to check > if log_tbl falls in any of the ranges and if the range is E820_TYPE_RAM. If that condition is satisfied, > then you have to split that range into 3. i.e. the E820_TYPE_RAM range before tpm_log, the tpm_log > E820_TYPE_RESERVED range, and the E820_TYPE_RAM range after. There are no helper functions, so this > splitting involves playing with a lot of pointers, and it looks quite ugly. I believe doing this > way is more likely to introduce bugs. > > If we are having to compensate for an EFI bug, would it make sense to do it in the way done > in RFC and do it in kernel rather than libstub? It is simple and very likely to be bug free. > I don't see how this could be an EFI bug, given that it does not deal with E820 tables at all.
Hello Ard, On Thu, Sep 12, 2024 at 12:51:57PM +0200, Ard Biesheuvel wrote: > I don't see how this could be an EFI bug, given that it does not deal > with E820 tables at all. I want to back up a little bit and make sure I am following the discussion. From what I understand from previous discussion, we have an EFI bug as the root cause of this issue. This happens because the EFI does NOT mark the EFI TPM event log memory region as reserved (EFI_RESERVED_TYPE). Not having an entry for the event table memory in EFI memory mapped, then libstub will ignore it completely (the TPM event log memory range) and not populate e820 table with it. Once the e820 table does not have the memory range for TPM event log, then the kernel is free to overwrite that memory region, causing corruptions all across the board. From what I understand from the thread discussion, there are three ways to "solve" it: 1) Fix the EFI to pass the TPM event log memory as reserved. 2) Workaround it in libstub, and considering the TPM event log memory range when populating the e820 table. (As proposed in https://lore.kernel.org/all/2542182d-aa79-4705-91b6-fa593bacffa6@gmail.com/) 3) Workaround in later in the kernel, as proposed in https://lore.kernel.org/all/20240911104109.1831501-1-usamaarif642@gmail.com/ Please let me know if my understanding is flawed here. Thank you! --breno
On Thu, 2024-09-12 at 06:03 -0700, Breno Leitao wrote: > Hello Ard, > > On Thu, Sep 12, 2024 at 12:51:57PM +0200, Ard Biesheuvel wrote: > > I don't see how this could be an EFI bug, given that it does not > > deal with E820 tables at all. > > I want to back up a little bit and make sure I am following the > discussion. > > From what I understand from previous discussion, we have an EFI bug > as the root cause of this issue. > > This happens because the EFI does NOT mark the EFI TPM event log > memory region as reserved (EFI_RESERVED_TYPE). Not having an entry > for the event table memory in EFI memory mapped, then libstub will > ignore it completely (the TPM event log memory range) and not > populate e820 table with it. Wait, that's not correct. The TPM log is in memory that doesn't survive ExitBootServices (by design in case the OS doesn't care about it). So the EFI stub actually copies it over to a new configuration table that is in reserved memory before it calls ExitBootServices. This new copy should be in kernel reserved memory regardless of its e820 map status. Regards, James
Hello James, On Thu, Sep 12, 2024 at 12:22:01PM -0400, James Bottomley wrote: > On Thu, 2024-09-12 at 06:03 -0700, Breno Leitao wrote: > > Hello Ard, > > > > On Thu, Sep 12, 2024 at 12:51:57PM +0200, Ard Biesheuvel wrote: > > > I don't see how this could be an EFI bug, given that it does not > > > deal with E820 tables at all. > > > > I want to back up a little bit and make sure I am following the > > discussion. > > > > From what I understand from previous discussion, we have an EFI bug > > as the root cause of this issue. > > > > This happens because the EFI does NOT mark the EFI TPM event log > > memory region as reserved (EFI_RESERVED_TYPE). Not having an entry > > for the event table memory in EFI memory mapped, then libstub will > > ignore it completely (the TPM event log memory range) and not > > populate e820 table with it. > > Wait, that's not correct. The TPM log is in memory that doesn't > survive ExitBootServices (by design in case the OS doesn't care about > it). So the EFI stub actually copies it over to a new configuration > table that is in reserved memory before it calls ExitBootServices. > This new copy should be in kernel reserved memory regardless of its > e820 map status. First of all, thanks for clarifying some points here. How should the TPM log table be passed to the next kernel when kexecing() since it didn't surive ExitBootServices?
On Fri, 2024-09-13 at 04:57 -0700, Breno Leitao wrote: > Hello James, > > On Thu, Sep 12, 2024 at 12:22:01PM -0400, James Bottomley wrote: > > On Thu, 2024-09-12 at 06:03 -0700, Breno Leitao wrote: > > > Hello Ard, > > > > > > On Thu, Sep 12, 2024 at 12:51:57PM +0200, Ard Biesheuvel wrote: > > > > I don't see how this could be an EFI bug, given that it does > > > > not deal with E820 tables at all. > > > > > > I want to back up a little bit and make sure I am following the > > > discussion. > > > > > > From what I understand from previous discussion, we have an EFI > > > bug as the root cause of this issue. > > > > > > This happens because the EFI does NOT mark the EFI TPM event log > > > memory region as reserved (EFI_RESERVED_TYPE). Not having an > > > entry for the event table memory in EFI memory mapped, then > > > libstub will ignore it completely (the TPM event log memory > > > range) and not populate e820 table with it. > > > > Wait, that's not correct. The TPM log is in memory that doesn't > > survive ExitBootServices (by design in case the OS doesn't care > > about it). So the EFI stub actually copies it over to a new > > configuration table that is in reserved memory before it calls > > ExitBootServices. This new copy should be in kernel reserved > > memory regardless of its e820 map status. > > First of all, thanks for clarifying some points here. > > How should the TPM log table be passed to the next kernel when > kexecing() since it didn't surive ExitBootServices? I've no idea. I'm assuming you don't elaborately reconstruct the EFI boot services, so you can't enter the EFI boot stub before ExitBootServices is called? So I'd guess you want to preserve the EFI table that copied the TPM data in to kernel memory. James
James Bottomley <James.Bottomley@HansenPartnership.com> writes: > On Fri, 2024-09-13 at 04:57 -0700, Breno Leitao wrote: >> Hello James, >> >> On Thu, Sep 12, 2024 at 12:22:01PM -0400, James Bottomley wrote: >> > On Thu, 2024-09-12 at 06:03 -0700, Breno Leitao wrote: >> > > Hello Ard, >> > > >> > > On Thu, Sep 12, 2024 at 12:51:57PM +0200, Ard Biesheuvel wrote: >> > > > I don't see how this could be an EFI bug, given that it does >> > > > not deal with E820 tables at all. >> > > >> > > I want to back up a little bit and make sure I am following the >> > > discussion. >> > > >> > > From what I understand from previous discussion, we have an EFI >> > > bug as the root cause of this issue. >> > > >> > > This happens because the EFI does NOT mark the EFI TPM event log >> > > memory region as reserved (EFI_RESERVED_TYPE). Not having an >> > > entry for the event table memory in EFI memory mapped, then >> > > libstub will ignore it completely (the TPM event log memory >> > > range) and not populate e820 table with it. >> > >> > Wait, that's not correct. The TPM log is in memory that doesn't >> > survive ExitBootServices (by design in case the OS doesn't care >> > about it). So the EFI stub actually copies it over to a new >> > configuration table that is in reserved memory before it calls >> > ExitBootServices. This new copy should be in kernel reserved >> > memory regardless of its e820 map status. >> >> First of all, thanks for clarifying some points here. >> >> How should the TPM log table be passed to the next kernel when >> kexecing() since it didn't surive ExitBootServices? > > I've no idea. I'm assuming you don't elaborately reconstruct the EFI > boot services, so you can't enter the EFI boot stub before > ExitBootServices is called? So I'd guess you want to preserve the EFI > table that copied the TPM data in to kernel memory. This leaves two practical questions if I have been following everything correctly. 1) How to get kexec to avoid picking that memory for the new kernel to run in before it initializes itself. (AKA the getting stomped by relocate kernel problem). 2) How to point the new kernel to preserved tpm_log. This recommendation is from memory so it may be a bit off but the general structure should work. The idea is as follows. - Pass the information between kernels. It is probably simplest for the kernel to have a command line option that tells the kernel the address and size of the tpm_log. We have a couple of mechanisms here. Assuming you are loading a bzImage with kexec_file_load you should be able to have the in kernel loader to add those arguments to the kernel command line. - Ensure that when the loader is finding an address to load the new kernel it treats the address of the tpm_log as unavailable. Does that sound like a doable plan? Eric
Hi Eric, Thanks for chiming in. On Mon, 16 Sept 2024 at 22:21, Eric W. Biederman <ebiederm@xmission.com> wrote: > > James Bottomley <James.Bottomley@HansenPartnership.com> writes: > > > On Fri, 2024-09-13 at 04:57 -0700, Breno Leitao wrote: > >> Hello James, > >> > >> On Thu, Sep 12, 2024 at 12:22:01PM -0400, James Bottomley wrote: > >> > On Thu, 2024-09-12 at 06:03 -0700, Breno Leitao wrote: > >> > > Hello Ard, > >> > > > >> > > On Thu, Sep 12, 2024 at 12:51:57PM +0200, Ard Biesheuvel wrote: > >> > > > I don't see how this could be an EFI bug, given that it does > >> > > > not deal with E820 tables at all. > >> > > > >> > > I want to back up a little bit and make sure I am following the > >> > > discussion. > >> > > > >> > > From what I understand from previous discussion, we have an EFI > >> > > bug as the root cause of this issue. > >> > > > >> > > This happens because the EFI does NOT mark the EFI TPM event log > >> > > memory region as reserved (EFI_RESERVED_TYPE). Not having an > >> > > entry for the event table memory in EFI memory mapped, then > >> > > libstub will ignore it completely (the TPM event log memory > >> > > range) and not populate e820 table with it. > >> > > >> > Wait, that's not correct. The TPM log is in memory that doesn't > >> > survive ExitBootServices (by design in case the OS doesn't care > >> > about it). So the EFI stub actually copies it over to a new > >> > configuration table that is in reserved memory before it calls > >> > ExitBootServices. This new copy should be in kernel reserved > >> > memory regardless of its e820 map status. > >> > >> First of all, thanks for clarifying some points here. > >> > >> How should the TPM log table be passed to the next kernel when > >> kexecing() since it didn't surive ExitBootServices? > > > > I've no idea. I'm assuming you don't elaborately reconstruct the EFI > > boot services, so you can't enter the EFI boot stub before > > ExitBootServices is called? So I'd guess you want to preserve the EFI > > table that copied the TPM data in to kernel memory. > > This leaves two practical questions if I have been following everything > correctly. > > 1) How to get kexec to avoid picking that memory for the new kernel to > run in before it initializes itself. (AKA the getting stomped by > relocate kernel problem). > > 2) How to point the new kernel to preserved tpm_log. > > > This recommendation is from memory so it may be a bit off but > the general structure should work. The idea is as follows. > > - Pass the information between kernels. > > It is probably simplest for the kernel to have a command line option > that tells the kernel the address and size of the tpm_log. > > We have a couple of mechanisms here. Assuming you are loading a > bzImage with kexec_file_load you should be able to have the in kernel > loader to add those arguments to the kernel command line. > This shouldn't be necessary, and I think it is actively harmful to keep inventing special ways for the kexec kernel to learn about these things that deviate from the methods used by the first kernel. This is how we ended up with 5 sources of truth for the physical memory map (EFI memory map, memblock and 3 different versions of the e820 memory map). We should try very hard to make kexec idempotent, and reuse the existing methods where possible. In this case, the EFI configuration table is already being exposed to the kexec kernel, which describes the base of the allocation. The size of the allocation can be derived from the table header. > - Ensure that when the loader is finding an address to load the new > kernel it treats the address of the tpm_log as unavailable. > The TPM log is a table created by the EFI stub loader, which is part of the kernel. So if we need to tweak this for kexec's benefit, I'd prefer changing it in a way that can accommodate the first kernel too. However, I think the current method already has that property so I don't think we need to do anything (modulo fixing the bug) That said, I am doubtful that the kexec kernel can make meaningful use of the TPM log to begin with, given that the TPM will be out of sync at this point. But it is still better to keep it for symmetry, letting the higher level kexec/kdump logic running in user space reason about whether the TPM log has any value to it. -- Ard.
Ard Biesheuvel <ardb@kernel.org> writes: > Hi Eric, > > Thanks for chiming in. It just looked like after James gave some expert input the conversation got stuck, so I am just trying to move it along. I don't think anyone knows what this whole elephant looks like, which makes solving the problem tricky. > On Mon, 16 Sept 2024 at 22:21, Eric W. Biederman <ebiederm@xmission.com> wrote: >> >> James Bottomley <James.Bottomley@HansenPartnership.com> writes: >> >> > On Fri, 2024-09-13 at 04:57 -0700, Breno Leitao wrote: >> >> Hello James, >> >> >> >> On Thu, Sep 12, 2024 at 12:22:01PM -0400, James Bottomley wrote: >> >> > On Thu, 2024-09-12 at 06:03 -0700, Breno Leitao wrote: >> >> > > Hello Ard, >> >> > > >> >> > > On Thu, Sep 12, 2024 at 12:51:57PM +0200, Ard Biesheuvel wrote: >> >> > > > I don't see how this could be an EFI bug, given that it does >> >> > > > not deal with E820 tables at all. >> >> > > >> >> > > I want to back up a little bit and make sure I am following the >> >> > > discussion. >> >> > > >> >> > > From what I understand from previous discussion, we have an EFI >> >> > > bug as the root cause of this issue. >> >> > > >> >> > > This happens because the EFI does NOT mark the EFI TPM event log >> >> > > memory region as reserved (EFI_RESERVED_TYPE). Not having an >> >> > > entry for the event table memory in EFI memory mapped, then >> >> > > libstub will ignore it completely (the TPM event log memory >> >> > > range) and not populate e820 table with it. >> >> > >> >> > Wait, that's not correct. The TPM log is in memory that doesn't >> >> > survive ExitBootServices (by design in case the OS doesn't care >> >> > about it). So the EFI stub actually copies it over to a new >> >> > configuration table that is in reserved memory before it calls >> >> > ExitBootServices. This new copy should be in kernel reserved >> >> > memory regardless of its e820 map status. >> >> >> >> First of all, thanks for clarifying some points here. >> >> >> >> How should the TPM log table be passed to the next kernel when >> >> kexecing() since it didn't surive ExitBootServices? >> > >> > I've no idea. I'm assuming you don't elaborately reconstruct the EFI >> > boot services, so you can't enter the EFI boot stub before >> > ExitBootServices is called? So I'd guess you want to preserve the EFI >> > table that copied the TPM data in to kernel memory. >> >> This leaves two practical questions if I have been following everything >> correctly. >> >> 1) How to get kexec to avoid picking that memory for the new kernel to >> run in before it initializes itself. (AKA the getting stomped by >> relocate kernel problem). >> >> 2) How to point the new kernel to preserved tpm_log. >> >> >> This recommendation is from memory so it may be a bit off but >> the general structure should work. The idea is as follows. >> >> - Pass the information between kernels. >> >> It is probably simplest for the kernel to have a command line option >> that tells the kernel the address and size of the tpm_log. >> >> We have a couple of mechanisms here. Assuming you are loading a >> bzImage with kexec_file_load you should be able to have the in kernel >> loader to add those arguments to the kernel command line. >> > > This shouldn't be necessary, and I think it is actively harmful to > keep inventing special ways for the kexec kernel to learn about these > things that deviate from the methods used by the first kernel. This is > how we ended up with 5 sources of truth for the physical memory map > (EFI memory map, memblock and 3 different versions of the e820 memory > map). > > We should try very hard to make kexec idempotent, and reuse the > existing methods where possible. In this case, the EFI configuration > table is already being exposed to the kexec kernel, which describes > the base of the allocation. The size of the allocation can be derived > from the table header. > >> - Ensure that when the loader is finding an address to load the new >> kernel it treats the address of the tpm_log as unavailable. >> > > The TPM log is a table created by the EFI stub loader, which is part > of the kernel. So if we need to tweak this for kexec's benefit, I'd > prefer changing it in a way that can accommodate the first kernel too. > However, I think the current method already has that property so I > don't think we need to do anything (modulo fixing the bug) I am fine with not inventing a new mechanism, but I think we need to reuse whatever mechanism the stub loader uses to pass it's table to the kernel. Not the EFI table that disappears at ExitBootServices(). > That said, I am doubtful that the kexec kernel can make meaningful use > of the TPM log to begin with, given that the TPM will be out of sync > at this point. But it is still better to keep it for symmetry, letting > the higher level kexec/kdump logic running in user space reason about > whether the TPM log has any value to it. Someone seems to think so or there would not be a complaint that it is getting corrupted. This should not be the kexec-on-panic kernel as that runs in memory that is reserved solely for it's own use. So we are talking something like using kexec as a bootloader. Eric
On Tue, 17 Sept 2024 at 17:24, Eric W. Biederman <ebiederm@xmission.com> wrote: > > Ard Biesheuvel <ardb@kernel.org> writes: > > > Hi Eric, > > > > Thanks for chiming in. > > It just looked like after James gave some expert input the > conversation got stuck, so I am just trying to move it along. > > I don't think anyone knows what this whole elephant looks like, > which makes solving the problem tricky. > > > On Mon, 16 Sept 2024 at 22:21, Eric W. Biederman <ebiederm@xmission.com> wrote: > >> ... > >> > >> This leaves two practical questions if I have been following everything > >> correctly. > >> > >> 1) How to get kexec to avoid picking that memory for the new kernel to > >> run in before it initializes itself. (AKA the getting stomped by > >> relocate kernel problem). > >> > >> 2) How to point the new kernel to preserved tpm_log. > >> > >> > >> This recommendation is from memory so it may be a bit off but > >> the general structure should work. The idea is as follows. > >> > >> - Pass the information between kernels. > >> > >> It is probably simplest for the kernel to have a command line option > >> that tells the kernel the address and size of the tpm_log. > >> > >> We have a couple of mechanisms here. Assuming you are loading a > >> bzImage with kexec_file_load you should be able to have the in kernel > >> loader to add those arguments to the kernel command line. > >> > > > > This shouldn't be necessary, and I think it is actively harmful to > > keep inventing special ways for the kexec kernel to learn about these > > things that deviate from the methods used by the first kernel. This is > > how we ended up with 5 sources of truth for the physical memory map > > (EFI memory map, memblock and 3 different versions of the e820 memory > > map). > > > > We should try very hard to make kexec idempotent, and reuse the > > existing methods where possible. In this case, the EFI configuration > > table is already being exposed to the kexec kernel, which describes > > the base of the allocation. The size of the allocation can be derived > > from the table header. > > > >> - Ensure that when the loader is finding an address to load the new > >> kernel it treats the address of the tpm_log as unavailable. > >> > > > > The TPM log is a table created by the EFI stub loader, which is part > > of the kernel. So if we need to tweak this for kexec's benefit, I'd > > prefer changing it in a way that can accommodate the first kernel too. > > However, I think the current method already has that property so I > > don't think we need to do anything (modulo fixing the bug) > > I am fine with not inventing a new mechanism, but I think we need > to reuse whatever mechanism the stub loader uses to pass it's > table to the kernel. Not the EFI table that disappears at > ExitBootServices(). > Not sure what you mean here - the EFI table that gets clobbered by kexec *is* the table that is created by the stub loader to pass the TPM log to the kernel. Not sure what alternative you have in mind here. > > That said, I am doubtful that the kexec kernel can make meaningful use > > of the TPM log to begin with, given that the TPM will be out of sync > > at this point. But it is still better to keep it for symmetry, letting > > the higher level kexec/kdump logic running in user space reason about > > whether the TPM log has any value to it. > > Someone seems to think so or there would not be a complaint that it is > getting corrupted. > No. The problem is that the size of the table is *in* the table, and so if it gets corrupted, the code that attempts to memblock_reserve() it goes off into the weeds. But that does not imply there is a point to having access to this table from a kexec kernel in the first place. > This should not be the kexec-on-panic kernel as that runs in memory > that is reserved solely for it's own use. So we are talking something > like using kexec as a bootloader. > kexec as a bootloader under TPM based measured boot will need to do a lot more than pass the firmware's event log to the next kernel. I'd expect a properly engineered kexec to replace this table entirely, and include the hashes of the assets it has loaded and measured into the respective PCRs. But let's stick to solving the actual issue here, rather than philosophize on how kexec might work in this context.
Ard Biesheuvel <ardb@kernel.org> writes: > On Tue, 17 Sept 2024 at 17:24, Eric W. Biederman <ebiederm@xmission.com> wrote: >> >> Ard Biesheuvel <ardb@kernel.org> writes: >> >> > Hi Eric, >> > >> > Thanks for chiming in. >> >> It just looked like after James gave some expert input the >> conversation got stuck, so I am just trying to move it along. >> >> I don't think anyone knows what this whole elephant looks like, >> which makes solving the problem tricky. >> >> > On Mon, 16 Sept 2024 at 22:21, Eric W. Biederman <ebiederm@xmission.com> wrote: >> >> > ... >> >> >> >> This leaves two practical questions if I have been following everything >> >> correctly. >> >> >> >> 1) How to get kexec to avoid picking that memory for the new kernel to >> >> run in before it initializes itself. (AKA the getting stomped by >> >> relocate kernel problem). >> >> >> >> 2) How to point the new kernel to preserved tpm_log. >> >> >> >> >> >> This recommendation is from memory so it may be a bit off but >> >> the general structure should work. The idea is as follows. >> >> >> >> - Pass the information between kernels. >> >> >> >> It is probably simplest for the kernel to have a command line option >> >> that tells the kernel the address and size of the tpm_log. >> >> >> >> We have a couple of mechanisms here. Assuming you are loading a >> >> bzImage with kexec_file_load you should be able to have the in kernel >> >> loader to add those arguments to the kernel command line. >> >> >> > >> > This shouldn't be necessary, and I think it is actively harmful to >> > keep inventing special ways for the kexec kernel to learn about these >> > things that deviate from the methods used by the first kernel. This is >> > how we ended up with 5 sources of truth for the physical memory map >> > (EFI memory map, memblock and 3 different versions of the e820 memory >> > map). >> > >> > We should try very hard to make kexec idempotent, and reuse the >> > existing methods where possible. In this case, the EFI configuration >> > table is already being exposed to the kexec kernel, which describes >> > the base of the allocation. The size of the allocation can be derived >> > from the table header. >> > >> >> - Ensure that when the loader is finding an address to load the new >> >> kernel it treats the address of the tpm_log as unavailable. >> >> >> > >> > The TPM log is a table created by the EFI stub loader, which is part >> > of the kernel. So if we need to tweak this for kexec's benefit, I'd >> > prefer changing it in a way that can accommodate the first kernel too. >> > However, I think the current method already has that property so I >> > don't think we need to do anything (modulo fixing the bug) >> >> I am fine with not inventing a new mechanism, but I think we need >> to reuse whatever mechanism the stub loader uses to pass it's >> table to the kernel. Not the EFI table that disappears at >> ExitBootServices(). >> > > Not sure what you mean here - the EFI table that gets clobbered by > kexec *is* the table that is created by the stub loader to pass the > TPM log to the kernel. Not sure what alternative you have in mind > here. I was referring to whatever the EFI table that James Bottomley mentioned that I presume the stub loader reads from when the stub loader constructs the tpm_log that is passed to the kernel. So I believe we are in agreement of everything except terminology. >> > That said, I am doubtful that the kexec kernel can make meaningful use >> > of the TPM log to begin with, given that the TPM will be out of sync >> > at this point. But it is still better to keep it for symmetry, letting >> > the higher level kexec/kdump logic running in user space reason about >> > whether the TPM log has any value to it. >> >> Someone seems to think so or there would not be a complaint that it is >> getting corrupted. >> > > No. The problem is that the size of the table is *in* the table, and > so if it gets corrupted, the code that attempts to memblock_reserve() > it goes off into the weeds. But that does not imply there is a point > to having access to this table from a kexec kernel in the first place. If there is no point to having access to it then we should just not pass anything to the loaded kernel, so the kernel does not think there is anything there. >> This should not be the kexec-on-panic kernel as that runs in memory >> that is reserved solely for it's own use. So we are talking something >> like using kexec as a bootloader. >> > > kexec as a bootloader under TPM based measured boot will need to do a > lot more than pass the firmware's event log to the next kernel. I'd > expect a properly engineered kexec to replace this table entirely, and > include the hashes of the assets it has loaded and measured into the > respective PCRs. > > But let's stick to solving the actual issue here, rather than > philosophize on how kexec might work in this context. I am fine with that. The complaint I had seen was that the table was being corrupted and asking how to solve that. It seems I haven't read the part of the conversation where it was made clear that no one wants the tpm_log after kexec. If someone wants the tpm_log then we need to solve this problem. Eric
On Wed, 18 Sept 2024 at 05:14, Eric W. Biederman <ebiederm@xmission.com> wrote: > > Ard Biesheuvel <ardb@kernel.org> writes: > > > On Tue, 17 Sept 2024 at 17:24, Eric W. Biederman <ebiederm@xmission.com> wrote: > >> > >> Ard Biesheuvel <ardb@kernel.org> writes: > >> > >> > Hi Eric, > >> > > >> > Thanks for chiming in. > >> > >> It just looked like after James gave some expert input the > >> conversation got stuck, so I am just trying to move it along. > >> > >> I don't think anyone knows what this whole elephant looks like, > >> which makes solving the problem tricky. > >> > >> > On Mon, 16 Sept 2024 at 22:21, Eric W. Biederman <ebiederm@xmission.com> wrote: > >> >> > > ... > >> >> > >> >> This leaves two practical questions if I have been following everything > >> >> correctly. > >> >> > >> >> 1) How to get kexec to avoid picking that memory for the new kernel to > >> >> run in before it initializes itself. (AKA the getting stomped by > >> >> relocate kernel problem). > >> >> > >> >> 2) How to point the new kernel to preserved tpm_log. > >> >> > >> >> > >> >> This recommendation is from memory so it may be a bit off but > >> >> the general structure should work. The idea is as follows. > >> >> > >> >> - Pass the information between kernels. > >> >> > >> >> It is probably simplest for the kernel to have a command line option > >> >> that tells the kernel the address and size of the tpm_log. > >> >> > >> >> We have a couple of mechanisms here. Assuming you are loading a > >> >> bzImage with kexec_file_load you should be able to have the in kernel > >> >> loader to add those arguments to the kernel command line. > >> >> > >> > > >> > This shouldn't be necessary, and I think it is actively harmful to > >> > keep inventing special ways for the kexec kernel to learn about these > >> > things that deviate from the methods used by the first kernel. This is > >> > how we ended up with 5 sources of truth for the physical memory map > >> > (EFI memory map, memblock and 3 different versions of the e820 memory > >> > map). > >> > > >> > We should try very hard to make kexec idempotent, and reuse the > >> > existing methods where possible. In this case, the EFI configuration > >> > table is already being exposed to the kexec kernel, which describes > >> > the base of the allocation. The size of the allocation can be derived > >> > from the table header. > >> > > >> >> - Ensure that when the loader is finding an address to load the new > >> >> kernel it treats the address of the tpm_log as unavailable. > >> >> > >> > > >> > The TPM log is a table created by the EFI stub loader, which is part > >> > of the kernel. So if we need to tweak this for kexec's benefit, I'd > >> > prefer changing it in a way that can accommodate the first kernel too. > >> > However, I think the current method already has that property so I > >> > don't think we need to do anything (modulo fixing the bug) > >> > >> I am fine with not inventing a new mechanism, but I think we need > >> to reuse whatever mechanism the stub loader uses to pass it's > >> table to the kernel. Not the EFI table that disappears at > >> ExitBootServices(). > >> > > > > Not sure what you mean here - the EFI table that gets clobbered by > > kexec *is* the table that is created by the stub loader to pass the > > TPM log to the kernel. Not sure what alternative you have in mind > > here. > > I was referring to whatever the EFI table that James Bottomley mentioned > that I presume the stub loader reads from when the stub loader > constructs the tpm_log that is passed to the kernel. > There is no such table. The event log is exposed by the firmware via a TCG2 protocol interface, which is no longer available after boot. So the stub loader (which is the last kernel component that has access to this interface) invokes this protocol and copies the output into a table in memory which is exposed to the kernel proper as a EFI configuration table. So the main issue here is that EFI configuration tables are passed on to kexec kernels, and we have to ensure (in the general case) that the associated memory is not reused. The implication is that the stub loader should always use EFI_ACPI_RECLAIM_MEMORY for allocations that are referenced via EFI configuration tables, and doing so for this table makes the bug go away. > So I believe we are in agreement of everything except terminology. > Sure. > >> > That said, I am doubtful that the kexec kernel can make meaningful use > >> > of the TPM log to begin with, given that the TPM will be out of sync > >> > at this point. But it is still better to keep it for symmetry, letting > >> > the higher level kexec/kdump logic running in user space reason about > >> > whether the TPM log has any value to it. > >> > >> Someone seems to think so or there would not be a complaint that it is > >> getting corrupted. > >> > > > > No. The problem is that the size of the table is *in* the table, and > > so if it gets corrupted, the code that attempts to memblock_reserve() > > it goes off into the weeds. But that does not imply there is a point > > to having access to this table from a kexec kernel in the first place. > > If there is no point to having access to it then we should just not > pass anything to the loaded kernel, so the kernel does not think there > is anything there. > > >> This should not be the kexec-on-panic kernel as that runs in memory > >> that is reserved solely for it's own use. So we are talking something > >> like using kexec as a bootloader. > >> > > > > kexec as a bootloader under TPM based measured boot will need to do a > > lot more than pass the firmware's event log to the next kernel. I'd > > expect a properly engineered kexec to replace this table entirely, and > > include the hashes of the assets it has loaded and measured into the > > respective PCRs. > > > > But let's stick to solving the actual issue here, rather than > > philosophize on how kexec might work in this context. > > > I am fine with that. The complaint I had seen was that the table was > being corrupted and asking how to solve that. It seems I haven't read > the part of the conversation where it was made clear that no one wants > the tpm_log after kexec. > It was not made clear, that is why I raised the question. I argued that the TPM log has limited utility after a kexec, given that we will be in one of two situations: - the kexec boot chain cares about the TPM and measured boot, and will therefore have extended the TPM's PCRs and the TPM log will be out of sync; - the kexec boot chain does not care, and so there is no point in forwarding the TPM log. Perhaps there is a third case where kdump wants to inspect the TPM log that the crashed kernel accessed? But this is rather speculative. > If someone wants the tpm_log then we need to solve this problem. > Agreed.
On Wed, Sep 18, 2024 at 09:36:06AM +0200, Ard Biesheuvel wrote: > On Wed, 18 Sept 2024 at 05:14, Eric W. Biederman <ebiederm@xmission.com> wrote: > > Ard Biesheuvel <ardb@kernel.org> writes: > > > On Tue, 17 Sept 2024 at 17:24, Eric W. Biederman <ebiederm@xmission.com> wrote: > > >> Ard Biesheuvel <ardb@kernel.org> writes: > > >> This should not be the kexec-on-panic kernel as that runs in memory > > >> that is reserved solely for it's own use. So we are talking something > > >> like using kexec as a bootloader. > > > > > > kexec as a bootloader under TPM based measured boot will need to do a > > > lot more than pass the firmware's event log to the next kernel. I'd > > > expect a properly engineered kexec to replace this table entirely, and > > > include the hashes of the assets it has loaded and measured into the > > > respective PCRs. > > > > > > But let's stick to solving the actual issue here, rather than > > > philosophize on how kexec might work in this context. > > > > I am fine with that. The complaint I had seen was that the table was > > being corrupted and asking how to solve that. It seems I haven't read > > the part of the conversation where it was made clear that no one wants > > the tpm_log after kexec. > > > It was not made clear, that is why I raised the question. I argued > that the TPM log has limited utility after a kexec, given that we will > be in one of two situations: > - the kexec boot chain cares about the TPM and measured boot, and will > therefore have extended the TPM's PCRs and the TPM log will be out of > sync; > - the kexec boot chain does not care, and so there is no point in > forwarding the TPM log. > > Perhaps there is a third case where kdump wants to inspect the TPM log > that the crashed kernel accessed? But this is rather speculative. Generally the kernel/host OS and the firmware are touching different PCRs in the TPM. The firmware eventlog covers what the firmware/bootloader measured; itself, option ROMs, secure boot details, bootloader, initial kernel/initrd (if we're talking grub as the initial bootloader). These details are not changed by a kexec, and provide the anchor of the software trust chain. A kexec'd kernel will generally not touch the same PCRs. The primary way I know to carry kexec measurements / logs over to new kernels is using IMA, which will be configured to use one of the later PCRs (default of 10). That means that the firmware event log is still completely valid to subsequent kernels, and is still required to validate the firmware/bootloader trust chain. You then probably _also_ want to make use of the IMA log to validate the kexec'd kernel chain, but that's separate. > > If someone wants the tpm_log then we need to solve this problem. > Agreed. There's a real requirement and use for kexec'd kernels to be able to continue to access the firmware TPM event log; to the extent that there are also patches floating around to have this carried over via device tree on non-UEFI platforms. J. -- Avoid temporary variables and strange women. This .sig brought to you by the letter U and the number 37 Product of the Republic of HuggieTag
On Wed, Oct 09, 2024 at 10:10:57AM +0100, Jonathan McDowell wrote: > On Wed, Sep 18, 2024 at 09:36:06AM +0200, Ard Biesheuvel wrote: > > On Wed, 18 Sept 2024 at 05:14, Eric W. Biederman <ebiederm@xmission.com> wrote: > > > Ard Biesheuvel <ardb@kernel.org> writes: > > > > On Tue, 17 Sept 2024 at 17:24, Eric W. Biederman <ebiederm@xmission.com> wrote: > > > >> Ard Biesheuvel <ardb@kernel.org> writes: > > > > >> This should not be the kexec-on-panic kernel as that runs in memory > > > >> that is reserved solely for it's own use. So we are talking something > > > >> like using kexec as a bootloader. > > > > > > > > kexec as a bootloader under TPM based measured boot will need to do a > > > > lot more than pass the firmware's event log to the next kernel. I'd > > > > expect a properly engineered kexec to replace this table entirely, and > > > > include the hashes of the assets it has loaded and measured into the > > > > respective PCRs. > > > > > > > > But let's stick to solving the actual issue here, rather than > > > > philosophize on how kexec might work in this context. > > > > > > I am fine with that. The complaint I had seen was that the table was > > > being corrupted and asking how to solve that. It seems I haven't read > > > the part of the conversation where it was made clear that no one wants > > > the tpm_log after kexec. > > > > > It was not made clear, that is why I raised the question. I argued > > that the TPM log has limited utility after a kexec, given that we will > > be in one of two situations: > > - the kexec boot chain cares about the TPM and measured boot, and will > > therefore have extended the TPM's PCRs and the TPM log will be out of > > sync; > > - the kexec boot chain does not care, and so there is no point in > > forwarding the TPM log. > > > > Perhaps there is a third case where kdump wants to inspect the TPM log > > that the crashed kernel accessed? But this is rather speculative. > > Generally the kernel/host OS and the firmware are touching different > PCRs in the TPM. > > The firmware eventlog covers what the firmware/bootloader measured; > itself, option ROMs, secure boot details, bootloader, initial > kernel/initrd (if we're talking grub as the initial bootloader). These > details are not changed by a kexec, and provide the anchor of the > software trust chain. > > A kexec'd kernel will generally not touch the same PCRs. The primary way > I know to carry kexec measurements / logs over to new kernels is using > IMA, which will be configured to use one of the later PCRs (default of > 10). What about in the case where you don't have Grub, but, use the kernel as the bootloader, kexecing into the desired kernel? Will the bootloader-kernel touch the same PCRs as GRUB, or it will only touch PCRs above 10? Thanks --breno
On Wed, Oct 09, 2024 at 03:46:32AM -0700, Breno Leitao wrote: > On Wed, Oct 09, 2024 at 10:10:57AM +0100, Jonathan McDowell wrote: > > On Wed, Sep 18, 2024 at 09:36:06AM +0200, Ard Biesheuvel wrote: > > > On Wed, 18 Sept 2024 at 05:14, Eric W. Biederman <ebiederm@xmission.com> wrote: > > > > Ard Biesheuvel <ardb@kernel.org> writes: > > > > > On Tue, 17 Sept 2024 at 17:24, Eric W. Biederman <ebiederm@xmission.com> wrote: > > > > >> Ard Biesheuvel <ardb@kernel.org> writes: > > > > > > >> This should not be the kexec-on-panic kernel as that runs in memory > > > > >> that is reserved solely for it's own use. So we are talking something > > > > >> like using kexec as a bootloader. > > > > > > > > > > kexec as a bootloader under TPM based measured boot will need to do a > > > > > lot more than pass the firmware's event log to the next kernel. I'd > > > > > expect a properly engineered kexec to replace this table entirely, and > > > > > include the hashes of the assets it has loaded and measured into the > > > > > respective PCRs. > > > > > > > > > > But let's stick to solving the actual issue here, rather than > > > > > philosophize on how kexec might work in this context. > > > > > > > > I am fine with that. The complaint I had seen was that the table was > > > > being corrupted and asking how to solve that. It seems I haven't read > > > > the part of the conversation where it was made clear that no one wants > > > > the tpm_log after kexec. > > > > > > > It was not made clear, that is why I raised the question. I argued > > > that the TPM log has limited utility after a kexec, given that we will > > > be in one of two situations: > > > - the kexec boot chain cares about the TPM and measured boot, and will > > > therefore have extended the TPM's PCRs and the TPM log will be out of > > > sync; > > > - the kexec boot chain does not care, and so there is no point in > > > forwarding the TPM log. > > > > > > Perhaps there is a third case where kdump wants to inspect the TPM log > > > that the crashed kernel accessed? But this is rather speculative. > > > > Generally the kernel/host OS and the firmware are touching different > > PCRs in the TPM. > > > > The firmware eventlog covers what the firmware/bootloader measured; > > itself, option ROMs, secure boot details, bootloader, initial > > kernel/initrd (if we're talking grub as the initial bootloader). These > > details are not changed by a kexec, and provide the anchor of the > > software trust chain. > > > > A kexec'd kernel will generally not touch the same PCRs. The primary way > > I know to carry kexec measurements / logs over to new kernels is using > > IMA, which will be configured to use one of the later PCRs (default of > > 10). > > What about in the case where you don't have Grub, but, use the kernel as > the bootloader, kexecing into the desired kernel? > > Will the bootloader-kernel touch the same PCRs as GRUB, or it will only > touch PCRs above 10? A kernel kexecing into another will generally use IMA if it wants to measure into the TPM, which will use PCR 10 by default and not conflict with the firmware PCRs (and you then use the IMA integrity log, which is passed over a kexec, to work out the kexec side of things). You still need the firmware event log in that case because the "bootloader" kernel combo you load is measured into the TPM by the firmware. You _could_ technically configure things up to re-use some of the firmware PCRs, but it generally wouldn't make a lot of sense to do so and I've not seen any examples of that sort of configuration. J. -- 101 things you can't have too much of : 41 - Tea.
On Thu, 12 Sept 2024 at 15:03, Breno Leitao <leitao@debian.org> wrote: > > Hello Ard, > > On Thu, Sep 12, 2024 at 12:51:57PM +0200, Ard Biesheuvel wrote: > > I don't see how this could be an EFI bug, given that it does not deal > > with E820 tables at all. > > I want to back up a little bit and make sure I am following the > discussion. > > From what I understand from previous discussion, we have an EFI bug as > the root cause of this issue. > > This happens because the EFI does NOT mark the EFI TPM event log memory > region as reserved (EFI_RESERVED_TYPE). Why do you think EFI should use EFI_RESERVED_TYPE in this case? The EFI spec is very clear that EFI_RESERVED_TYPE really shouldn't be used for anything by EFI itself. It is quite common for EFI configuration tables to be passed as EfiRuntimeServicesData (SMBIOS), EfiBootServicesData (ESRT) or EFiAcpiReclaim (ACPI tables). Reserved memory is mostly for memory that even the firmware does not know what it is for, i.e., particular platform specific uses. In general, it is up to the OS to ensure that EFI configuration tables that it cares about should be reserved in the correct way. > Not having an entry for the > event table memory in EFI memory mapped, then libstub will ignore it > completely (the TPM event log memory range) and not populate e820 table > with it. > > Once the e820 table does not have the memory range for TPM event log, > then the kernel is free to overwrite that memory region, causing > corruptions all across the board. > We shouldn't be relying on the E820 table for this. > From what I understand from the thread discussion, there are three ways > to "solve" it: > > 1) Fix the EFI to pass the TPM event log memory as reserved. > > 2) Workaround it in libstub, and considering the TPM event log memory > range when populating the e820 table. (As proposed in > https://lore.kernel.org/all/2542182d-aa79-4705-91b6-fa593bacffa6@gmail.com/) > > 3) Workaround in later in the kernel, as proposed in > https://lore.kernel.org/all/20240911104109.1831501-1-usamaarif642@gmail.com/ > Does the below help at all? --- a/drivers/firmware/efi/tpm.c +++ b/drivers/firmware/efi/tpm.c @@ -60,7 +60,7 @@ int __init efi_tpm_eventlog_init(void) } tbl_size = sizeof(*log_tbl) + log_tbl->size; - memblock_reserve(efi.tpm_log, tbl_size); + efi_mem_reserve(efi.tpm_log, tbl_size); if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { pr_info("TPM Final Events table not present\n");
On Thu, Sep 12, 2024 at 03:10:43PM +0200, Ard Biesheuvel wrote: > On Thu, 12 Sept 2024 at 15:03, Breno Leitao <leitao@debian.org> wrote: > > On Thu, Sep 12, 2024 at 12:51:57PM +0200, Ard Biesheuvel wrote: > > > I don't see how this could be an EFI bug, given that it does not deal > > > with E820 tables at all. > > > > I want to back up a little bit and make sure I am following the > > discussion. > > > > From what I understand from previous discussion, we have an EFI bug as > > the root cause of this issue. > > > > This happens because the EFI does NOT mark the EFI TPM event log memory > > region as reserved (EFI_RESERVED_TYPE). > > Why do you think EFI should use EFI_RESERVED_TYPE in this case? > > The EFI spec is very clear that EFI_RESERVED_TYPE really shouldn't be > used for anything by EFI itself. It is quite common for EFI > configuration tables to be passed as EfiRuntimeServicesData (SMBIOS), > EfiBootServicesData (ESRT) or EFiAcpiReclaim (ACPI tables). > > Reserved memory is mostly for memory that even the firmware does not > know what it is for, i.e., particular platform specific uses. > > In general, it is up to the OS to ensure that EFI configuration tables > that it cares about should be reserved in the correct way. Thanks for the explanation. So, if I understand what you meant here, the TPM event log memory range shouldn't be listed as a memory region in EFI memory map (as passed by the firmware to the OS). Hence, this is not a EFI firmware bug, but a OS/Kernel bug. Am I correct with the statements above? Another question, looking at the Spec[1] it says: If the ACPI TPM2 table contains the address and size of the Platform Firmware TCG log, firmware “pins” the memory associated with the Platform Firmware TCG log, and reports this memory as “Reserved” memory via the INT 15h/E820 interface What is the 'firmware' in the statement above, that says that reports the memory as reserved? (Is it libstub?!) Thank you so much for guiding us here, --breno [1] https://trustedcomputinggroup.org/wp-content/uploads/PC-ClientPlatform_Profile_for_TPM_2p0_Systems_v49_161114_public-review.pdf
On Thu, 12 Sept 2024 at 16:29, Breno Leitao <leitao@debian.org> wrote: > > On Thu, Sep 12, 2024 at 03:10:43PM +0200, Ard Biesheuvel wrote: > > On Thu, 12 Sept 2024 at 15:03, Breno Leitao <leitao@debian.org> wrote: > > > On Thu, Sep 12, 2024 at 12:51:57PM +0200, Ard Biesheuvel wrote: > > > > I don't see how this could be an EFI bug, given that it does not deal > > > > with E820 tables at all. > > > > > > I want to back up a little bit and make sure I am following the > > > discussion. > > > > > > From what I understand from previous discussion, we have an EFI bug as > > > the root cause of this issue. > > > > > > This happens because the EFI does NOT mark the EFI TPM event log memory > > > region as reserved (EFI_RESERVED_TYPE). > > > > Why do you think EFI should use EFI_RESERVED_TYPE in this case? > > > > The EFI spec is very clear that EFI_RESERVED_TYPE really shouldn't be > > used for anything by EFI itself. It is quite common for EFI > > configuration tables to be passed as EfiRuntimeServicesData (SMBIOS), > > EfiBootServicesData (ESRT) or EFiAcpiReclaim (ACPI tables). > > > > Reserved memory is mostly for memory that even the firmware does not > > know what it is for, i.e., particular platform specific uses. > > > > In general, it is up to the OS to ensure that EFI configuration tables > > that it cares about should be reserved in the correct way. > > Thanks for the explanation. > > So, if I understand what you meant here, the TPM event log memory range > shouldn't be listed as a memory region in EFI memory map (as passed by > the firmware to the OS). > > Hence, this is not a EFI firmware bug, but a OS/Kernel bug. > > Am I correct with the statements above? > No, not entirely. But I also missed the face that this table is actually created by the EFI stub in Linux, not the firmware. It is *not* the same memory region that the TPM2 ACPI table describes, and so what the various specs say about that is entirely irrelevant. The TPM event log configuration table is created by the EFI stub, which uses the TCG2::GetEventLog () protocol method to obtain it. This must be done by the EFI stub because these protocols will no longer be available once the OS boots. But the data is not used by the EFI stub, only by the OS, which is why it is passed in memory like this. The memory in question is allocated as EFI_LOADER_DATA, and so we are relying on the OS to know that this memory is special, and needs to be preserved. I think the solution here is to use a different memory type: --- a/drivers/firmware/efi/libstub/tpm.c +++ b/drivers/firmware/efi/libstub/tpm.c @@ -96,7 +96,7 @@ static void efi_retrieve_tcg2_eventlog(int version, efi_physical_addr_t log_loca } /* Allocate space for the logs and copy them. */ - status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, + status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY, sizeof(*log_tbl) + log_size, (void **)&log_tbl); if (status != EFI_SUCCESS) { which will be treated appropriately by the existing EFI-to-E820 conversion logic.
On 12/09/2024 16:21, Ard Biesheuvel wrote: > On Thu, 12 Sept 2024 at 16:29, Breno Leitao <leitao@debian.org> wrote: >> >> On Thu, Sep 12, 2024 at 03:10:43PM +0200, Ard Biesheuvel wrote: >>> On Thu, 12 Sept 2024 at 15:03, Breno Leitao <leitao@debian.org> wrote: >>>> On Thu, Sep 12, 2024 at 12:51:57PM +0200, Ard Biesheuvel wrote: >>>>> I don't see how this could be an EFI bug, given that it does not deal >>>>> with E820 tables at all. >>>> >>>> I want to back up a little bit and make sure I am following the >>>> discussion. >>>> >>>> From what I understand from previous discussion, we have an EFI bug as >>>> the root cause of this issue. >>>> >>>> This happens because the EFI does NOT mark the EFI TPM event log memory >>>> region as reserved (EFI_RESERVED_TYPE). >>> >>> Why do you think EFI should use EFI_RESERVED_TYPE in this case? >>> >>> The EFI spec is very clear that EFI_RESERVED_TYPE really shouldn't be >>> used for anything by EFI itself. It is quite common for EFI >>> configuration tables to be passed as EfiRuntimeServicesData (SMBIOS), >>> EfiBootServicesData (ESRT) or EFiAcpiReclaim (ACPI tables). >>> >>> Reserved memory is mostly for memory that even the firmware does not >>> know what it is for, i.e., particular platform specific uses. >>> >>> In general, it is up to the OS to ensure that EFI configuration tables >>> that it cares about should be reserved in the correct way. >> >> Thanks for the explanation. >> >> So, if I understand what you meant here, the TPM event log memory range >> shouldn't be listed as a memory region in EFI memory map (as passed by >> the firmware to the OS). >> >> Hence, this is not a EFI firmware bug, but a OS/Kernel bug. >> >> Am I correct with the statements above? >> > > No, not entirely. But I also missed the face that this table is > actually created by the EFI stub in Linux, not the firmware. It is > *not* the same memory region that the TPM2 ACPI table describes, and > so what the various specs say about that is entirely irrelevant. > > The TPM event log configuration table is created by the EFI stub, > which uses the TCG2::GetEventLog () protocol method to obtain it. This > must be done by the EFI stub because these protocols will no longer be > available once the OS boots. But the data is not used by the EFI stub, > only by the OS, which is why it is passed in memory like this. > > The memory in question is allocated as EFI_LOADER_DATA, and so we are > relying on the OS to know that this memory is special, and needs to be > preserved. > > I think the solution here is to use a different memory type: > > --- a/drivers/firmware/efi/libstub/tpm.c > +++ b/drivers/firmware/efi/libstub/tpm.c > @@ -96,7 +96,7 @@ static void efi_retrieve_tcg2_eventlog(int version, > efi_physical_addr_t log_loca > } > > /* Allocate space for the logs and copy them. */ > - status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, > + status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY, > sizeof(*log_tbl) + log_size, (void **)&log_tbl); > > if (status != EFI_SUCCESS) { > > which will be treated appropriately by the existing EFI-to-E820 > conversion logic. I have tested above diff, and it works! No memory corruption. The region comes up as ACPI data: [ 0.000000] BIOS-e820: [mem 0x000000007fb6d000-0x000000007fb7efff] ACPI data and kexec doesnt interfere with it. Thanks, Usama
On Thu, 12 Sept 2024 at 17:35, Usama Arif <usamaarif642@gmail.com> wrote: > > > > On 12/09/2024 16:21, Ard Biesheuvel wrote: > > On Thu, 12 Sept 2024 at 16:29, Breno Leitao <leitao@debian.org> wrote: > >> > >> On Thu, Sep 12, 2024 at 03:10:43PM +0200, Ard Biesheuvel wrote: > >>> On Thu, 12 Sept 2024 at 15:03, Breno Leitao <leitao@debian.org> wrote: > >>>> On Thu, Sep 12, 2024 at 12:51:57PM +0200, Ard Biesheuvel wrote: > >>>>> I don't see how this could be an EFI bug, given that it does not deal > >>>>> with E820 tables at all. > >>>> > >>>> I want to back up a little bit and make sure I am following the > >>>> discussion. > >>>> > >>>> From what I understand from previous discussion, we have an EFI bug as > >>>> the root cause of this issue. > >>>> > >>>> This happens because the EFI does NOT mark the EFI TPM event log memory > >>>> region as reserved (EFI_RESERVED_TYPE). > >>> > >>> Why do you think EFI should use EFI_RESERVED_TYPE in this case? > >>> > >>> The EFI spec is very clear that EFI_RESERVED_TYPE really shouldn't be > >>> used for anything by EFI itself. It is quite common for EFI > >>> configuration tables to be passed as EfiRuntimeServicesData (SMBIOS), > >>> EfiBootServicesData (ESRT) or EFiAcpiReclaim (ACPI tables). > >>> > >>> Reserved memory is mostly for memory that even the firmware does not > >>> know what it is for, i.e., particular platform specific uses. > >>> > >>> In general, it is up to the OS to ensure that EFI configuration tables > >>> that it cares about should be reserved in the correct way. > >> > >> Thanks for the explanation. > >> > >> So, if I understand what you meant here, the TPM event log memory range > >> shouldn't be listed as a memory region in EFI memory map (as passed by > >> the firmware to the OS). > >> > >> Hence, this is not a EFI firmware bug, but a OS/Kernel bug. > >> > >> Am I correct with the statements above? > >> > > > > No, not entirely. But I also missed the face that this table is > > actually created by the EFI stub in Linux, not the firmware. It is > > *not* the same memory region that the TPM2 ACPI table describes, and > > so what the various specs say about that is entirely irrelevant. > > > > The TPM event log configuration table is created by the EFI stub, > > which uses the TCG2::GetEventLog () protocol method to obtain it. This > > must be done by the EFI stub because these protocols will no longer be > > available once the OS boots. But the data is not used by the EFI stub, > > only by the OS, which is why it is passed in memory like this. > > > > The memory in question is allocated as EFI_LOADER_DATA, and so we are > > relying on the OS to know that this memory is special, and needs to be > > preserved. > > > > I think the solution here is to use a different memory type: > > > > --- a/drivers/firmware/efi/libstub/tpm.c > > +++ b/drivers/firmware/efi/libstub/tpm.c > > @@ -96,7 +96,7 @@ static void efi_retrieve_tcg2_eventlog(int version, > > efi_physical_addr_t log_loca > > } > > > > /* Allocate space for the logs and copy them. */ > > - status = efi_bs_call(allocate_pool, EFI_LOADER_DATA, > > + status = efi_bs_call(allocate_pool, EFI_ACPI_RECLAIM_MEMORY, > > sizeof(*log_tbl) + log_size, (void **)&log_tbl); > > > > if (status != EFI_SUCCESS) { > > > > which will be treated appropriately by the existing EFI-to-E820 > > conversion logic. > > I have tested above diff, and it works! No memory corruption. > > The region comes up as ACPI data: > [ 0.000000] BIOS-e820: [mem 0x000000007fb6d000-0x000000007fb7efff] ACPI data > > and kexec doesnt interfere with it. > Thanks, I'll take that as a tested-by
On 12/09/2024 14:10, Ard Biesheuvel wrote: > Does the below help at all? > > --- a/drivers/firmware/efi/tpm.c > +++ b/drivers/firmware/efi/tpm.c > @@ -60,7 +60,7 @@ int __init efi_tpm_eventlog_init(void) > } > > tbl_size = sizeof(*log_tbl) + log_tbl->size; > - memblock_reserve(efi.tpm_log, tbl_size); > + efi_mem_reserve(efi.tpm_log, tbl_size); > > if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { > pr_info("TPM Final Events table not present\n"); Unfortunately not. efi_mem_reserve updates e820_table, while kexec looks at /sys/firmware/memmap which is e820_table_firmware. arch_update_firmware_area introduced in the RFC patch does the same thing as efi_mem_reserve does at its end, just with e820_table_firmware instead of e820_table. i.e. efi_mem_reserve does: e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); e820__update_table(e820_table); while arch_update_firmware_area does: e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); e820__update_table(e820_table_firmware); Thanks, Usama
On Thu, 12 Sept 2024 at 15:55, Usama Arif <usamaarif642@gmail.com> wrote: > > > > On 12/09/2024 14:10, Ard Biesheuvel wrote: > > Does the below help at all? > > > > --- a/drivers/firmware/efi/tpm.c > > +++ b/drivers/firmware/efi/tpm.c > > @@ -60,7 +60,7 @@ int __init efi_tpm_eventlog_init(void) > > } > > > > tbl_size = sizeof(*log_tbl) + log_tbl->size; > > - memblock_reserve(efi.tpm_log, tbl_size); > > + efi_mem_reserve(efi.tpm_log, tbl_size); > > > > if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { > > pr_info("TPM Final Events table not present\n"); > > Unfortunately not. efi_mem_reserve updates e820_table, while kexec looks at /sys/firmware/memmap > which is e820_table_firmware. > > arch_update_firmware_area introduced in the RFC patch does the same thing as efi_mem_reserve does at > its end, just with e820_table_firmware instead of e820_table. > i.e. efi_mem_reserve does: > e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > e820__update_table(e820_table); > > while arch_update_firmware_area does: > e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > e820__update_table(e820_table_firmware); > Shame. Using efi_mem_reserve() is appropriate here in any case, but I guess kexec on x86 needs to be fixed to juggle the EFI memory map, memblock table, and 3 (!) versions of the E820 table in the correct way (e820_table, e820_table_kexec and e820_table_firmware) Perhaps we can put this additional logic in x86's implementation of efi_arch_mem_reserve()? AFAICT, all callers of efi_mem_reserve() deal with configuration tables produced by the firmware that may not be reserved correctly if kexec looks at e820_table_firmware[] only.
(cc Dave) Full thread here: https://lore.kernel.org/all/CAMj1kXG1hbiafKRyC5qM1Vj5X7x-dmLndqqo2AYnHMRxDz-80w@mail.gmail.com/T/#u On Thu, 12 Sept 2024 at 16:05, Ard Biesheuvel <ardb@kernel.org> wrote: > > On Thu, 12 Sept 2024 at 15:55, Usama Arif <usamaarif642@gmail.com> wrote: > > > > > > > > On 12/09/2024 14:10, Ard Biesheuvel wrote: > > > Does the below help at all? > > > > > > --- a/drivers/firmware/efi/tpm.c > > > +++ b/drivers/firmware/efi/tpm.c > > > @@ -60,7 +60,7 @@ int __init efi_tpm_eventlog_init(void) > > > } > > > > > > tbl_size = sizeof(*log_tbl) + log_tbl->size; > > > - memblock_reserve(efi.tpm_log, tbl_size); > > > + efi_mem_reserve(efi.tpm_log, tbl_size); > > > > > > if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { > > > pr_info("TPM Final Events table not present\n"); > > > > Unfortunately not. efi_mem_reserve updates e820_table, while kexec looks at /sys/firmware/memmap > > which is e820_table_firmware. > > > > arch_update_firmware_area introduced in the RFC patch does the same thing as efi_mem_reserve does at > > its end, just with e820_table_firmware instead of e820_table. > > i.e. efi_mem_reserve does: > > e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > > e820__update_table(e820_table); > > > > while arch_update_firmware_area does: > > e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > > e820__update_table(e820_table_firmware); > > > > Shame. > > Using efi_mem_reserve() is appropriate here in any case, but I guess > kexec on x86 needs to be fixed to juggle the EFI memory map, memblock > table, and 3 (!) versions of the E820 table in the correct way > (e820_table, e820_table_kexec and e820_table_firmware) > > Perhaps we can put this additional logic in x86's implementation of > efi_arch_mem_reserve()? AFAICT, all callers of efi_mem_reserve() deal > with configuration tables produced by the firmware that may not be > reserved correctly if kexec looks at e820_table_firmware[] only.
On Thu, 12 Sept 2024 at 22:15, Ard Biesheuvel <ardb@kernel.org> wrote: > > (cc Dave) Thanks for ccing me. > > Full thread here: > https://lore.kernel.org/all/CAMj1kXG1hbiafKRyC5qM1Vj5X7x-dmLndqqo2AYnHMRxDz-80w@mail.gmail.com/T/#u > > On Thu, 12 Sept 2024 at 16:05, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > On Thu, 12 Sept 2024 at 15:55, Usama Arif <usamaarif642@gmail.com> wrote: > > > > > > > > > > > > On 12/09/2024 14:10, Ard Biesheuvel wrote: > > > > Does the below help at all? > > > > > > > > --- a/drivers/firmware/efi/tpm.c > > > > +++ b/drivers/firmware/efi/tpm.c > > > > @@ -60,7 +60,7 @@ int __init efi_tpm_eventlog_init(void) > > > > } > > > > > > > > tbl_size = sizeof(*log_tbl) + log_tbl->size; > > > > - memblock_reserve(efi.tpm_log, tbl_size); > > > > + efi_mem_reserve(efi.tpm_log, tbl_size); > > > > > > > > if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { > > > > pr_info("TPM Final Events table not present\n"); > > > > > > Unfortunately not. efi_mem_reserve updates e820_table, while kexec looks at /sys/firmware/memmap > > > which is e820_table_firmware. > > > > > > arch_update_firmware_area introduced in the RFC patch does the same thing as efi_mem_reserve does at > > > its end, just with e820_table_firmware instead of e820_table. > > > i.e. efi_mem_reserve does: > > > e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > > > e820__update_table(e820_table); > > > > > > while arch_update_firmware_area does: > > > e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > > > e820__update_table(e820_table_firmware); > > > > > > > Shame. > > > > Using efi_mem_reserve() is appropriate here in any case, but I guess > > kexec on x86 needs to be fixed to juggle the EFI memory map, memblock > > table, and 3 (!) versions of the E820 table in the correct way > > (e820_table, e820_table_kexec and e820_table_firmware) > > > > Perhaps we can put this additional logic in x86's implementation of > > efi_arch_mem_reserve()? AFAICT, all callers of efi_mem_reserve() deal > > with configuration tables produced by the firmware that may not be > > reserved correctly if kexec looks at e820_table_firmware[] only. > I have not read all the conversations, let me have a look and response later. The first glance about the patch is that I think the kexec_file_load syscall (default of latest kexec-tools) will not use e820_table_firmware AFAIK. it will only use e820_table_kexec. Usama, can you confirm how you tested this? kexec -c -l will use kexec_load syscall kexec [-s] -l will use kexec_file_load syscall Thanks Dave
On Fri, 13 Sept 2024 at 18:56, Dave Young <dyoung@redhat.com> wrote: > > On Thu, 12 Sept 2024 at 22:15, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > (cc Dave) > > Thanks for ccing me. > > > > > Full thread here: > > https://lore.kernel.org/all/CAMj1kXG1hbiafKRyC5qM1Vj5X7x-dmLndqqo2AYnHMRxDz-80w@mail.gmail.com/T/#u > > > > On Thu, 12 Sept 2024 at 16:05, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > > On Thu, 12 Sept 2024 at 15:55, Usama Arif <usamaarif642@gmail.com> wrote: > > > > > > > > > > > > > > > > On 12/09/2024 14:10, Ard Biesheuvel wrote: > > > > > Does the below help at all? > > > > > > > > > > --- a/drivers/firmware/efi/tpm.c > > > > > +++ b/drivers/firmware/efi/tpm.c > > > > > @@ -60,7 +60,7 @@ int __init efi_tpm_eventlog_init(void) > > > > > } > > > > > > > > > > tbl_size = sizeof(*log_tbl) + log_tbl->size; > > > > > - memblock_reserve(efi.tpm_log, tbl_size); > > > > > + efi_mem_reserve(efi.tpm_log, tbl_size); > > > > > > > > > > if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { > > > > > pr_info("TPM Final Events table not present\n"); > > > > > > > > Unfortunately not. efi_mem_reserve updates e820_table, while kexec looks at /sys/firmware/memmap > > > > which is e820_table_firmware. > > > > > > > > arch_update_firmware_area introduced in the RFC patch does the same thing as efi_mem_reserve does at > > > > its end, just with e820_table_firmware instead of e820_table. > > > > i.e. efi_mem_reserve does: > > > > e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > > > > e820__update_table(e820_table); > > > > > > > > while arch_update_firmware_area does: > > > > e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > > > > e820__update_table(e820_table_firmware); > > > > > > > > > > Shame. > > > > > > Using efi_mem_reserve() is appropriate here in any case, but I guess > > > kexec on x86 needs to be fixed to juggle the EFI memory map, memblock > > > table, and 3 (!) versions of the E820 table in the correct way > > > (e820_table, e820_table_kexec and e820_table_firmware) > > > > > > Perhaps we can put this additional logic in x86's implementation of > > > efi_arch_mem_reserve()? AFAICT, all callers of efi_mem_reserve() deal > > > with configuration tables produced by the firmware that may not be > > > reserved correctly if kexec looks at e820_table_firmware[] only. > > > > I have not read all the conversations, let me have a look and response later. > I'm still confused after reading the code about why this issue can still happen with a efi_mem_reserve. Usama, Breno, could any of you share the exact steps on how to reproduce this issue with a kvm guest? Thanks Daev
On Sat, 14 Sept 2024 at 08:46, Dave Young <dyoung@redhat.com> wrote: > > On Fri, 13 Sept 2024 at 18:56, Dave Young <dyoung@redhat.com> wrote: > > > > On Thu, 12 Sept 2024 at 22:15, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > > (cc Dave) > > > > Thanks for ccing me. > > > > > > > > Full thread here: > > > https://lore.kernel.org/all/CAMj1kXG1hbiafKRyC5qM1Vj5X7x-dmLndqqo2AYnHMRxDz-80w@mail.gmail.com/T/#u > > > > > > On Thu, 12 Sept 2024 at 16:05, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > > > > On Thu, 12 Sept 2024 at 15:55, Usama Arif <usamaarif642@gmail.com> wrote: > > > > > > > > > > > > > > > > > > > > On 12/09/2024 14:10, Ard Biesheuvel wrote: > > > > > > Does the below help at all? > > > > > > > > > > > > --- a/drivers/firmware/efi/tpm.c > > > > > > +++ b/drivers/firmware/efi/tpm.c > > > > > > @@ -60,7 +60,7 @@ int __init efi_tpm_eventlog_init(void) > > > > > > } > > > > > > > > > > > > tbl_size = sizeof(*log_tbl) + log_tbl->size; > > > > > > - memblock_reserve(efi.tpm_log, tbl_size); > > > > > > + efi_mem_reserve(efi.tpm_log, tbl_size); > > > > > > > > > > > > if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { > > > > > > pr_info("TPM Final Events table not present\n"); > > > > > > > > > > Unfortunately not. efi_mem_reserve updates e820_table, while kexec looks at /sys/firmware/memmap > > > > > which is e820_table_firmware. > > > > > > > > > > arch_update_firmware_area introduced in the RFC patch does the same thing as efi_mem_reserve does at > > > > > its end, just with e820_table_firmware instead of e820_table. > > > > > i.e. efi_mem_reserve does: > > > > > e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > > > > > e820__update_table(e820_table); > > > > > > > > > > while arch_update_firmware_area does: > > > > > e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > > > > > e820__update_table(e820_table_firmware); > > > > > > > > > > > > > Shame. > > > > > > > > Using efi_mem_reserve() is appropriate here in any case, but I guess > > > > kexec on x86 needs to be fixed to juggle the EFI memory map, memblock > > > > table, and 3 (!) versions of the E820 table in the correct way > > > > (e820_table, e820_table_kexec and e820_table_firmware) > > > > > > > > Perhaps we can put this additional logic in x86's implementation of > > > > efi_arch_mem_reserve()? AFAICT, all callers of efi_mem_reserve() deal > > > > with configuration tables produced by the firmware that may not be > > > > reserved correctly if kexec looks at e820_table_firmware[] only. > > > > > > > I have not read all the conversations, let me have a look and response later. > > > > I'm still confused after reading the code about why this issue can > still happen with a efi_mem_reserve. > Usama, Breno, could any of you share the exact steps on how to > reproduce this issue with a kvm guest? > The code does not use efi_mem_reserve() only memblock_reserve().
On Sat, 14 Sept 2024 at 16:31, Ard Biesheuvel <ardb@kernel.org> wrote: > > On Sat, 14 Sept 2024 at 08:46, Dave Young <dyoung@redhat.com> wrote: > > > > On Fri, 13 Sept 2024 at 18:56, Dave Young <dyoung@redhat.com> wrote: > > > > > > On Thu, 12 Sept 2024 at 22:15, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > > > > (cc Dave) > > > > > > Thanks for ccing me. > > > > > > > > > > > Full thread here: > > > > https://lore.kernel.org/all/CAMj1kXG1hbiafKRyC5qM1Vj5X7x-dmLndqqo2AYnHMRxDz-80w@mail.gmail.com/T/#u > > > > > > > > On Thu, 12 Sept 2024 at 16:05, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > > > > > > On Thu, 12 Sept 2024 at 15:55, Usama Arif <usamaarif642@gmail.com> wrote: > > > > > > > > > > > > > > > > > > > > > > > > On 12/09/2024 14:10, Ard Biesheuvel wrote: > > > > > > > Does the below help at all? > > > > > > > > > > > > > > --- a/drivers/firmware/efi/tpm.c > > > > > > > +++ b/drivers/firmware/efi/tpm.c > > > > > > > @@ -60,7 +60,7 @@ int __init efi_tpm_eventlog_init(void) > > > > > > > } > > > > > > > > > > > > > > tbl_size = sizeof(*log_tbl) + log_tbl->size; > > > > > > > - memblock_reserve(efi.tpm_log, tbl_size); > > > > > > > + efi_mem_reserve(efi.tpm_log, tbl_size); > > > > > > > > > > > > > > if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { > > > > > > > pr_info("TPM Final Events table not present\n"); > > > > > > > > > > > > Unfortunately not. efi_mem_reserve updates e820_table, while kexec looks at /sys/firmware/memmap > > > > > > which is e820_table_firmware. Updating e820_table should be good enough, it depends on where the corruption is happening. kexec will find a suitable memory for the kernel via searching through the system ram resources. So efi_mem_reserve will update e820_table, then reserve in the resources list as E820_TYPE_RESERVED, thus it should not be a problem. During the 2nd kernel boot phase, it is carried as EFI_LOADER_DATA with EFI_MEMORY_RUNTIME attribute, I think it is also fine, and later efi_mem_reserve will be called as what have been done in previous kernel. So I think no need to update the e820_table_kexec and e820_table_firmware > > > > > > > > > > > > arch_update_firmware_area introduced in the RFC patch does the same thing as efi_mem_reserve does at > > > > > > its end, just with e820_table_firmware instead of e820_table. > > > > > > i.e. efi_mem_reserve does: > > > > > > e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > > > > > > e820__update_table(e820_table); > > > > > > > > > > > > while arch_update_firmware_area does: > > > > > > e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > > > > > > e820__update_table(e820_table_firmware); > > > > > > > > > > > > > > > > Shame. > > > > > > > > > > Using efi_mem_reserve() is appropriate here in any case, but I guess > > > > > kexec on x86 needs to be fixed to juggle the EFI memory map, memblock > > > > > table, and 3 (!) versions of the E820 table in the correct way > > > > > (e820_table, e820_table_kexec and e820_table_firmware) > > > > > > > > > > Perhaps we can put this additional logic in x86's implementation of > > > > > efi_arch_mem_reserve()? AFAICT, all callers of efi_mem_reserve() deal > > > > > with configuration tables produced by the firmware that may not be > > > > > reserved correctly if kexec looks at e820_table_firmware[] only. > > > > > > > > > > I have not read all the conversations, let me have a look and response later. > > > > > > > I'm still confused after reading the code about why this issue can > > still happen with a efi_mem_reserve. > > Usama, Breno, could any of you share the exact steps on how to > > reproduce this issue with a kvm guest? > > > > The code does not use efi_mem_reserve() only memblock_reserve(). Yes, I see this, I just thought that Usama tested with changes to efi_mem_reserve and it still did not work, this is what I'm confused about. But maybe Usama did not test and only checked the code and assumed that we have to update the e820_table_kexec and e820_table_firmware. See my reply inline above. Thanks Dave >
On Sat, 14 Sept 2024 at 17:24, Dave Young <dyoung@redhat.com> wrote: > > On Sat, 14 Sept 2024 at 16:31, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > On Sat, 14 Sept 2024 at 08:46, Dave Young <dyoung@redhat.com> wrote: > > > > > > On Fri, 13 Sept 2024 at 18:56, Dave Young <dyoung@redhat.com> wrote: > > > > > > > > On Thu, 12 Sept 2024 at 22:15, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > > > > > > (cc Dave) > > > > > > > > Thanks for ccing me. > > > > > > > > > > > > > > Full thread here: > > > > > https://lore.kernel.org/all/CAMj1kXG1hbiafKRyC5qM1Vj5X7x-dmLndqqo2AYnHMRxDz-80w@mail.gmail.com/T/#u > > > > > > > > > > On Thu, 12 Sept 2024 at 16:05, Ard Biesheuvel <ardb@kernel.org> wrote: > > > > > > > > > > > > On Thu, 12 Sept 2024 at 15:55, Usama Arif <usamaarif642@gmail.com> wrote: > > > > > > > > > > > > > > > > > > > > > > > > > > > > On 12/09/2024 14:10, Ard Biesheuvel wrote: > > > > > > > > Does the below help at all? > > > > > > > > > > > > > > > > --- a/drivers/firmware/efi/tpm.c > > > > > > > > +++ b/drivers/firmware/efi/tpm.c > > > > > > > > @@ -60,7 +60,7 @@ int __init efi_tpm_eventlog_init(void) > > > > > > > > } > > > > > > > > > > > > > > > > tbl_size = sizeof(*log_tbl) + log_tbl->size; > > > > > > > > - memblock_reserve(efi.tpm_log, tbl_size); > > > > > > > > + efi_mem_reserve(efi.tpm_log, tbl_size); > > > > > > > > > > > > > > > > if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { > > > > > > > > pr_info("TPM Final Events table not present\n"); > > > > > > > > > > > > > > Unfortunately not. efi_mem_reserve updates e820_table, while kexec looks at /sys/firmware/memmap > > > > > > > which is e820_table_firmware. > > Updating e820_table should be good enough, it depends on where the > corruption is happening. > > kexec will find a suitable memory for the kernel via searching through > the system ram resources. So efi_mem_reserve will update e820_table, > then reserve in the resources list as E820_TYPE_RESERVED, thus it > should not be a problem. > During the 2nd kernel boot phase, it is carried as EFI_LOADER_DATA > with EFI_MEMORY_RUNTIME attribute, I think it is also fine, and later > efi_mem_reserve will be called as what have been done in previous > kernel. > > So I think no need to update the e820_table_kexec and e820_table_firmware Hmm, oops, I again forgot the kexec_load code in userspace kexec-tools. The kexec-tools code still searching for memory ranges from e820_table_firmware > > > > > > > > > > > > > > > arch_update_firmware_area introduced in the RFC patch does the same thing as efi_mem_reserve does at > > > > > > > its end, just with e820_table_firmware instead of e820_table. > > > > > > > i.e. efi_mem_reserve does: > > > > > > > e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > > > > > > > e820__update_table(e820_table); > > > > > > > > > > > > > > while arch_update_firmware_area does: > > > > > > > e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > > > > > > > e820__update_table(e820_table_firmware); > > > > > > > > > > > > > > > > > > > Shame. > > > > > > > > > > > > Using efi_mem_reserve() is appropriate here in any case, but I guess > > > > > > kexec on x86 needs to be fixed to juggle the EFI memory map, memblock > > > > > > table, and 3 (!) versions of the E820 table in the correct way > > > > > > (e820_table, e820_table_kexec and e820_table_firmware) > > > > > > > > > > > > Perhaps we can put this additional logic in x86's implementation of > > > > > > efi_arch_mem_reserve()? AFAICT, all callers of efi_mem_reserve() deal > > > > > > with configuration tables produced by the firmware that may not be > > > > > > reserved correctly if kexec looks at e820_table_firmware[] only. > > > > > > > > > > > > > I have not read all the conversations, let me have a look and response later. > > > > > > > > > > I'm still confused after reading the code about why this issue can > > > still happen with a efi_mem_reserve. > > > Usama, Breno, could any of you share the exact steps on how to > > > reproduce this issue with a kvm guest? > > > > > > > The code does not use efi_mem_reserve() only memblock_reserve(). > > Yes, I see this, I just thought that Usama tested with changes to > efi_mem_reserve and it still did not work, this is what I'm confused > about. > > But maybe Usama did not test and only checked the code and assumed > that we have to update the e820_table_kexec and e820_table_firmware. > See my reply inline above. Please ignore the above comment. The userspace code does need the e820_table_firmware. So the best way to make it easier is to clean up the e820 tables and maintain only one table then the kernel kexec_file_load behavior will be the same as the userspace. But need a closer look about the details, eg. if the hibernate (mentioned in code comment) is happy. Or to change userspace to go through the /proc/iomem instead of checking the /sys/firmware/memmap > > Thanks > Dave > >
On 13/09/2024 11:56, Dave Young wrote: > On Thu, 12 Sept 2024 at 22:15, Ard Biesheuvel <ardb@kernel.org> wrote: >> >> (cc Dave) > > Thanks for ccing me. > >> >> Full thread here: >> https://lore.kernel.org/all/CAMj1kXG1hbiafKRyC5qM1Vj5X7x-dmLndqqo2AYnHMRxDz-80w@mail.gmail.com/T/#u >> >> On Thu, 12 Sept 2024 at 16:05, Ard Biesheuvel <ardb@kernel.org> wrote: >>> >>> On Thu, 12 Sept 2024 at 15:55, Usama Arif <usamaarif642@gmail.com> wrote: >>>> >>>> >>>> >>>> On 12/09/2024 14:10, Ard Biesheuvel wrote: >>>>> Does the below help at all? >>>>> >>>>> --- a/drivers/firmware/efi/tpm.c >>>>> +++ b/drivers/firmware/efi/tpm.c >>>>> @@ -60,7 +60,7 @@ int __init efi_tpm_eventlog_init(void) >>>>> } >>>>> >>>>> tbl_size = sizeof(*log_tbl) + log_tbl->size; >>>>> - memblock_reserve(efi.tpm_log, tbl_size); >>>>> + efi_mem_reserve(efi.tpm_log, tbl_size); >>>>> >>>>> if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { >>>>> pr_info("TPM Final Events table not present\n"); >>>> >>>> Unfortunately not. efi_mem_reserve updates e820_table, while kexec looks at /sys/firmware/memmap >>>> which is e820_table_firmware. >>>> >>>> arch_update_firmware_area introduced in the RFC patch does the same thing as efi_mem_reserve does at >>>> its end, just with e820_table_firmware instead of e820_table. >>>> i.e. efi_mem_reserve does: >>>> e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); >>>> e820__update_table(e820_table); >>>> >>>> while arch_update_firmware_area does: >>>> e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); >>>> e820__update_table(e820_table_firmware); >>>> >>> >>> Shame. >>> >>> Using efi_mem_reserve() is appropriate here in any case, but I guess >>> kexec on x86 needs to be fixed to juggle the EFI memory map, memblock >>> table, and 3 (!) versions of the E820 table in the correct way >>> (e820_table, e820_table_kexec and e820_table_firmware) >>> >>> Perhaps we can put this additional logic in x86's implementation of >>> efi_arch_mem_reserve()? AFAICT, all callers of efi_mem_reserve() deal >>> with configuration tables produced by the firmware that may not be >>> reserved correctly if kexec looks at e820_table_firmware[] only. >> > > I have not read all the conversations, let me have a look and response later. > > The first glance about the patch is that I think the kexec_file_load > syscall (default of latest kexec-tools) will not use > e820_table_firmware AFAIK. it will only use e820_table_kexec. I initially thought that as well. But it looks like kexec just reads /sys/firmware/memmap https://github.com/horms/kexec-tools/blob/main/kexec/firmware_memmap.h#L29 which is e820_table_firmware. The patch that Ard sent in https://lore.kernel.org/all/20240912155159.1951792-2-ardb+git@google.com/ is the right approach to it I believe, and I dont see the issue anymore after applying that patch. > > Usama, can you confirm how you tested this? > kexec -c -l will use kexec_load syscall I am currently testing in my VM setup with kexec_load. But production is running kexec_file_load and has the same issue. Thanks, Usama > kexec [-s] -l will use kexec_file_load syscall > > Thanks > Dave >
On Fri, 13 Sept 2024 at 19:07, Usama Arif <usamaarif642@gmail.com> wrote: > > > > On 13/09/2024 11:56, Dave Young wrote: > > On Thu, 12 Sept 2024 at 22:15, Ard Biesheuvel <ardb@kernel.org> wrote: > >> > >> (cc Dave) > > > > Thanks for ccing me. > > > >> > >> Full thread here: > >> https://lore.kernel.org/all/CAMj1kXG1hbiafKRyC5qM1Vj5X7x-dmLndqqo2AYnHMRxDz-80w@mail.gmail.com/T/#u > >> > >> On Thu, 12 Sept 2024 at 16:05, Ard Biesheuvel <ardb@kernel.org> wrote: > >>> > >>> On Thu, 12 Sept 2024 at 15:55, Usama Arif <usamaarif642@gmail.com> wrote: > >>>> > >>>> > >>>> > >>>> On 12/09/2024 14:10, Ard Biesheuvel wrote: > >>>>> Does the below help at all? > >>>>> > >>>>> --- a/drivers/firmware/efi/tpm.c > >>>>> +++ b/drivers/firmware/efi/tpm.c > >>>>> @@ -60,7 +60,7 @@ int __init efi_tpm_eventlog_init(void) > >>>>> } > >>>>> > >>>>> tbl_size = sizeof(*log_tbl) + log_tbl->size; > >>>>> - memblock_reserve(efi.tpm_log, tbl_size); > >>>>> + efi_mem_reserve(efi.tpm_log, tbl_size); > >>>>> > >>>>> if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { > >>>>> pr_info("TPM Final Events table not present\n"); > >>>> > >>>> Unfortunately not. efi_mem_reserve updates e820_table, while kexec looks at /sys/firmware/memmap > >>>> which is e820_table_firmware. > >>>> > >>>> arch_update_firmware_area introduced in the RFC patch does the same thing as efi_mem_reserve does at > >>>> its end, just with e820_table_firmware instead of e820_table. > >>>> i.e. efi_mem_reserve does: > >>>> e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > >>>> e820__update_table(e820_table); > >>>> > >>>> while arch_update_firmware_area does: > >>>> e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > >>>> e820__update_table(e820_table_firmware); > >>>> > >>> > >>> Shame. > >>> > >>> Using efi_mem_reserve() is appropriate here in any case, but I guess > >>> kexec on x86 needs to be fixed to juggle the EFI memory map, memblock > >>> table, and 3 (!) versions of the E820 table in the correct way > >>> (e820_table, e820_table_kexec and e820_table_firmware) > >>> > >>> Perhaps we can put this additional logic in x86's implementation of > >>> efi_arch_mem_reserve()? AFAICT, all callers of efi_mem_reserve() deal > >>> with configuration tables produced by the firmware that may not be > >>> reserved correctly if kexec looks at e820_table_firmware[] only. > >> > > > > I have not read all the conversations, let me have a look and response later. > > > > The first glance about the patch is that I think the kexec_file_load > > syscall (default of latest kexec-tools) will not use > > e820_table_firmware AFAIK. it will only use e820_table_kexec. > > I initially thought that as well. But it looks like kexec just reads /sys/firmware/memmap > > https://github.com/horms/kexec-tools/blob/main/kexec/firmware_memmap.h#L29 > > which is e820_table_firmware. That piece of code is only used by kexec_load > > The patch that Ard sent in https://lore.kernel.org/all/20240912155159.1951792-2-ardb+git@google.com/ > is the right approach to it I believe, and I dont see the issue anymore after applying that patch. > > > > > Usama, can you confirm how you tested this? > > kexec -c -l will use kexec_load syscall > > I am currently testing in my VM setup with kexec_load. But production is running > kexec_file_load and has the same issue. Ok, I mean efi_mem_reserve should be able to work if you retest with kexec_file_load. > > Thanks, > Usama > > > kexec [-s] -l will use kexec_file_load syscall > > > > Thanks > > Dave > > >
On Fri, 13 Sept 2024 at 19:13, Dave Young <dyoung@redhat.com> wrote: > > On Fri, 13 Sept 2024 at 19:07, Usama Arif <usamaarif642@gmail.com> wrote: > > > > > > > > On 13/09/2024 11:56, Dave Young wrote: > > > On Thu, 12 Sept 2024 at 22:15, Ard Biesheuvel <ardb@kernel.org> wrote: > > >> > > >> (cc Dave) > > > > > > Thanks for ccing me. > > > > > >> > > >> Full thread here: > > >> https://lore.kernel.org/all/CAMj1kXG1hbiafKRyC5qM1Vj5X7x-dmLndqqo2AYnHMRxDz-80w@mail.gmail.com/T/#u > > >> > > >> On Thu, 12 Sept 2024 at 16:05, Ard Biesheuvel <ardb@kernel.org> wrote: > > >>> > > >>> On Thu, 12 Sept 2024 at 15:55, Usama Arif <usamaarif642@gmail.com> wrote: > > >>>> > > >>>> > > >>>> > > >>>> On 12/09/2024 14:10, Ard Biesheuvel wrote: > > >>>>> Does the below help at all? > > >>>>> > > >>>>> --- a/drivers/firmware/efi/tpm.c > > >>>>> +++ b/drivers/firmware/efi/tpm.c > > >>>>> @@ -60,7 +60,7 @@ int __init efi_tpm_eventlog_init(void) > > >>>>> } > > >>>>> > > >>>>> tbl_size = sizeof(*log_tbl) + log_tbl->size; > > >>>>> - memblock_reserve(efi.tpm_log, tbl_size); > > >>>>> + efi_mem_reserve(efi.tpm_log, tbl_size); > > >>>>> > > >>>>> if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { > > >>>>> pr_info("TPM Final Events table not present\n"); > > >>>> > > >>>> Unfortunately not. efi_mem_reserve updates e820_table, while kexec looks at /sys/firmware/memmap > > >>>> which is e820_table_firmware. > > >>>> > > >>>> arch_update_firmware_area introduced in the RFC patch does the same thing as efi_mem_reserve does at > > >>>> its end, just with e820_table_firmware instead of e820_table. > > >>>> i.e. efi_mem_reserve does: > > >>>> e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > > >>>> e820__update_table(e820_table); > > >>>> > > >>>> while arch_update_firmware_area does: > > >>>> e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); > > >>>> e820__update_table(e820_table_firmware); > > >>>> > > >>> > > >>> Shame. > > >>> > > >>> Using efi_mem_reserve() is appropriate here in any case, but I guess > > >>> kexec on x86 needs to be fixed to juggle the EFI memory map, memblock > > >>> table, and 3 (!) versions of the E820 table in the correct way > > >>> (e820_table, e820_table_kexec and e820_table_firmware) > > >>> > > >>> Perhaps we can put this additional logic in x86's implementation of > > >>> efi_arch_mem_reserve()? AFAICT, all callers of efi_mem_reserve() deal > > >>> with configuration tables produced by the firmware that may not be > > >>> reserved correctly if kexec looks at e820_table_firmware[] only. > > >> > > > > > > I have not read all the conversations, let me have a look and response later. > > > > > > The first glance about the patch is that I think the kexec_file_load > > > syscall (default of latest kexec-tools) will not use > > > e820_table_firmware AFAIK. it will only use e820_table_kexec. > > > > I initially thought that as well. But it looks like kexec just reads /sys/firmware/memmap > > > > https://github.com/horms/kexec-tools/blob/main/kexec/firmware_memmap.h#L29 > > > > which is e820_table_firmware. > > That piece of code is only used by kexec_load > > > > > The patch that Ard sent in https://lore.kernel.org/all/20240912155159.1951792-2-ardb+git@google.com/ > > is the right approach to it I believe, and I dont see the issue anymore after applying that patch. > > > > > > > > Usama, can you confirm how you tested this? > > > kexec -c -l will use kexec_load syscall > > > > I am currently testing in my VM setup with kexec_load. But production is running > > kexec_file_load and has the same issue. > > Ok, I mean efi_mem_reserve should be able to work if you retest with > kexec_file_load. Hold on, I'm not sure about above :( checking the efi_arch_mem_reserve(), currently it updates the e820 table, if you update the e820_table_kexec and e820_table_firmware then I think both kexec_load and kexec_file_load will work. Anyway I was not aware very much about the firmware e820 tables and kexec tables when they were created. I suspect that a cleanup and revisit is needed. I will have a look at that. For Ard's fix to allocate it as ACPI memory, I think it should be good and simpler. > > > > > Thanks, > > Usama > > > > > kexec [-s] -l will use kexec_file_load syscall > > > > > > Thanks > > > Dave > > > > >
On 13/09/2024 12:49, Dave Young wrote: > On Fri, 13 Sept 2024 at 19:13, Dave Young <dyoung@redhat.com> wrote: >> >> On Fri, 13 Sept 2024 at 19:07, Usama Arif <usamaarif642@gmail.com> wrote: >>> >>> >>> >>> On 13/09/2024 11:56, Dave Young wrote: >>>> On Thu, 12 Sept 2024 at 22:15, Ard Biesheuvel <ardb@kernel.org> wrote: >>>>> >>>>> (cc Dave) >>>> >>>> Thanks for ccing me. >>>> >>>>> >>>>> Full thread here: >>>>> https://lore.kernel.org/all/CAMj1kXG1hbiafKRyC5qM1Vj5X7x-dmLndqqo2AYnHMRxDz-80w@mail.gmail.com/T/#u >>>>> >>>>> On Thu, 12 Sept 2024 at 16:05, Ard Biesheuvel <ardb@kernel.org> wrote: >>>>>> >>>>>> On Thu, 12 Sept 2024 at 15:55, Usama Arif <usamaarif642@gmail.com> wrote: >>>>>>> >>>>>>> >>>>>>> >>>>>>> On 12/09/2024 14:10, Ard Biesheuvel wrote: >>>>>>>> Does the below help at all? >>>>>>>> >>>>>>>> --- a/drivers/firmware/efi/tpm.c >>>>>>>> +++ b/drivers/firmware/efi/tpm.c >>>>>>>> @@ -60,7 +60,7 @@ int __init efi_tpm_eventlog_init(void) >>>>>>>> } >>>>>>>> >>>>>>>> tbl_size = sizeof(*log_tbl) + log_tbl->size; >>>>>>>> - memblock_reserve(efi.tpm_log, tbl_size); >>>>>>>> + efi_mem_reserve(efi.tpm_log, tbl_size); >>>>>>>> >>>>>>>> if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { >>>>>>>> pr_info("TPM Final Events table not present\n"); >>>>>>> >>>>>>> Unfortunately not. efi_mem_reserve updates e820_table, while kexec looks at /sys/firmware/memmap >>>>>>> which is e820_table_firmware. >>>>>>> >>>>>>> arch_update_firmware_area introduced in the RFC patch does the same thing as efi_mem_reserve does at >>>>>>> its end, just with e820_table_firmware instead of e820_table. >>>>>>> i.e. efi_mem_reserve does: >>>>>>> e820__range_update(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); >>>>>>> e820__update_table(e820_table); >>>>>>> >>>>>>> while arch_update_firmware_area does: >>>>>>> e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); >>>>>>> e820__update_table(e820_table_firmware); >>>>>>> >>>>>> >>>>>> Shame. >>>>>> >>>>>> Using efi_mem_reserve() is appropriate here in any case, but I guess >>>>>> kexec on x86 needs to be fixed to juggle the EFI memory map, memblock >>>>>> table, and 3 (!) versions of the E820 table in the correct way >>>>>> (e820_table, e820_table_kexec and e820_table_firmware) >>>>>> >>>>>> Perhaps we can put this additional logic in x86's implementation of >>>>>> efi_arch_mem_reserve()? AFAICT, all callers of efi_mem_reserve() deal >>>>>> with configuration tables produced by the firmware that may not be >>>>>> reserved correctly if kexec looks at e820_table_firmware[] only. >>>>> >>>> >>>> I have not read all the conversations, let me have a look and response later. >>>> >>>> The first glance about the patch is that I think the kexec_file_load >>>> syscall (default of latest kexec-tools) will not use >>>> e820_table_firmware AFAIK. it will only use e820_table_kexec. >>> >>> I initially thought that as well. But it looks like kexec just reads /sys/firmware/memmap >>> >>> https://github.com/horms/kexec-tools/blob/main/kexec/firmware_memmap.h#L29 >>> >>> which is e820_table_firmware. >> >> That piece of code is only used by kexec_load >> >>> >>> The patch that Ard sent in https://lore.kernel.org/all/20240912155159.1951792-2-ardb+git@google.com/ >>> is the right approach to it I believe, and I dont see the issue anymore after applying that patch. >>> >>>> >>>> Usama, can you confirm how you tested this? >>>> kexec -c -l will use kexec_load syscall >>> >>> I am currently testing in my VM setup with kexec_load. But production is running >>> kexec_file_load and has the same issue. >> >> Ok, I mean efi_mem_reserve should be able to work if you retest with >> kexec_file_load. > > Hold on, I'm not sure about above :( > > checking the efi_arch_mem_reserve(), currently it updates the e820 > table, if you update the e820_table_kexec and e820_table_firmware then > I think both kexec_load and kexec_file_load will work. > > Anyway I was not aware very much about the firmware e820 tables and > kexec tables when they were created. I suspect that a cleanup and > revisit is needed. I will have a look at that. Yes, I feel like there is one too many tables! From reading the code I understand that /sys/firmware/memmap should contain the untouched map at time of boot, i.e. e820_table_firmware. But I would be in favour of getting rid of e820_table_firmware, and just having e820_table_kexec. And /sys/firmware/memmap gets data from e820_table_kexec. > > For Ard's fix to allocate it as ACPI memory, I think it should be good > and simpler. > Agreed! >> >>> >>> Thanks, >>> Usama >>> >>>> kexec [-s] -l will use kexec_file_load syscall >>>> >>>> Thanks >>>> Dave >>>> >>> >
Hi Usama, > > Anyway I was not aware very much about the firmware e820 tables and > > kexec tables when they were created. I suspect that a cleanup and > > revisit is needed. I will have a look at that. > > Yes, I feel like there is one too many tables! From reading the code > I understand that /sys/firmware/memmap should contain the untouched map > at time of boot, i.e. e820_table_firmware. But I would be in favour of > getting rid of e820_table_firmware, and just having e820_table_kexec. > And /sys/firmware/memmap gets data from e820_table_kexec. Agreed, I have the same feelings. Thanks Dave
On 12/09/2024 11:51, Ard Biesheuvel wrote: > On Thu, 12 Sept 2024 at 12:23, Usama Arif <usamaarif642@gmail.com> wrote: >> >> >> >> On 11/09/2024 12:51, Ard Biesheuvel wrote: >>> On Wed, 11 Sept 2024 at 12:41, Usama Arif <usamaarif642@gmail.com> wrote: >>>> >>>> Looking at the TPM spec [1] >>>> >>>> If the ACPI TPM2 table contains the address and size of the Platform >>>> Firmware TCG log, firmware “pins” the memory associated with the >>>> Platform FirmwareTCG log, and reports this memory as “Reserved” memory >>>> via the INT 15h/E820 interface. >>>> >>>> It looks like the firmware should pass this as reserved in e820 memory >>>> map. However, it doesn't seem to. The firmware being tested on is: >>>> dmidecode -s bios-version >>>> edk2-20240214-2.el9 >>>> >>>> When this area is not reserved, it comes up as usable in >>>> /sys/firmware/memmap. This means that kexec, which uses that memmap >>>> to find usable memory regions, can select the region where efi.tpm_log >>>> is and overwrite it and relocate_kernel. >>>> >>>> Having a fix in firmware can be difficult to get through. As a secondary >>>> fix, this patch marks that region as reserved in e820_table_firmware if it >>>> is currently E820_TYPE_RAM so that kexec doesn't use it for kernel segments. >>>> >>>> [1] https://trustedcomputinggroup.org/wp-content/uploads/PC-ClientPlatform_Profile_for_TPM_2p0_Systems_v49_161114_public-review.pdf >>>> >>>> Signed-off-by: Usama Arif <usamaarif642@gmail.com> >>> >>> I would expect the EFI memory map to E820 conversion implemented in >>> the EFI stub to take care of this. >>> >> >> So I have been making a prototype with EFI stub, and the unfinished version is looking like a >> horrible hack. >> >> The only way to do this in libstub is to pass log_tbl all the way from efi_retrieve_tcg2_eventlog >> to efi_stub_entry and from there to setup_e820. >> While going through the efi memory map and converting it to e820 table in setup_e820, you have to check >> if log_tbl falls in any of the ranges and if the range is E820_TYPE_RAM. If that condition is satisfied, >> then you have to split that range into 3. i.e. the E820_TYPE_RAM range before tpm_log, the tpm_log >> E820_TYPE_RESERVED range, and the E820_TYPE_RAM range after. There are no helper functions, so this >> splitting involves playing with a lot of pointers, and it looks quite ugly. I believe doing this >> way is more likely to introduce bugs. >> >> If we are having to compensate for an EFI bug, would it make sense to do it in the way done >> in RFC and do it in kernel rather than libstub? It is simple and very likely to be bug free. >> > > I don't see how this could be an EFI bug, given that it does not deal > with E820 tables at all. EFI passes memory descriptors to libstub, libstub converts it to e820. I believe the right behaviour should be that EFI creates an EFI_RESERVED_TYPE region for that TPM log memory. Then libstub would automatically convert that EFI_RESERVED_TYPE to E820_TYPE_RESERVED in setup_e820 [1]. [1] https://elixir.bootlin.com/linux/v6.10.9/source/drivers/firmware/efi/libstub/x86-stub.c#L573
On 11/09/2024 12:51, Ard Biesheuvel wrote: > On Wed, 11 Sept 2024 at 12:41, Usama Arif <usamaarif642@gmail.com> wrote: >> >> Looking at the TPM spec [1] >> >> If the ACPI TPM2 table contains the address and size of the Platform >> Firmware TCG log, firmware “pins” the memory associated with the >> Platform FirmwareTCG log, and reports this memory as “Reserved” memory >> via the INT 15h/E820 interface. >> >> It looks like the firmware should pass this as reserved in e820 memory >> map. However, it doesn't seem to. The firmware being tested on is: >> dmidecode -s bios-version >> edk2-20240214-2.el9 >> >> When this area is not reserved, it comes up as usable in >> /sys/firmware/memmap. This means that kexec, which uses that memmap >> to find usable memory regions, can select the region where efi.tpm_log >> is and overwrite it and relocate_kernel. >> >> Having a fix in firmware can be difficult to get through. As a secondary >> fix, this patch marks that region as reserved in e820_table_firmware if it >> is currently E820_TYPE_RAM so that kexec doesn't use it for kernel segments. >> >> [1] https://trustedcomputinggroup.org/wp-content/uploads/PC-ClientPlatform_Profile_for_TPM_2p0_Systems_v49_161114_public-review.pdf >> >> Signed-off-by: Usama Arif <usamaarif642@gmail.com> Forgot to add: Reported-by: Breno Leitao <leitao@debian.org> > > I would expect the EFI memory map to E820 conversion implemented in > the EFI stub to take care of this. > > If you are not booting via the EFI stub, the bootloader is performing > this conversion, and so it should be done there instead. > I will look into this and report back. Thanks > >> --- >> arch/x86/include/asm/e820/api.h | 2 ++ >> arch/x86/kernel/e820.c | 6 ++++++ >> arch/x86/platform/efi/efi.c | 9 +++++++++ >> drivers/firmware/efi/tpm.c | 2 +- >> include/linux/efi.h | 7 +++++++ >> 5 files changed, 25 insertions(+), 1 deletion(-) >> >> diff --git a/arch/x86/include/asm/e820/api.h b/arch/x86/include/asm/e820/api.h >> index 2e74a7f0e935..4e9aa24f03bd 100644 >> --- a/arch/x86/include/asm/e820/api.h >> +++ b/arch/x86/include/asm/e820/api.h >> @@ -16,6 +16,8 @@ extern bool e820__mapped_all(u64 start, u64 end, enum e820_type type); >> >> extern void e820__range_add (u64 start, u64 size, enum e820_type type); >> extern u64 e820__range_update(u64 start, u64 size, enum e820_type old_type, enum e820_type new_type); >> +extern u64 e820__range_update_firmware(u64 start, u64 size, enum e820_type old_type, >> + enum e820_type new_type); >> extern u64 e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type); >> extern u64 e820__range_update_table(struct e820_table *t, u64 start, u64 size, enum e820_type old_type, enum e820_type new_type); >> >> diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c >> index 4893d30ce438..912400161623 100644 >> --- a/arch/x86/kernel/e820.c >> +++ b/arch/x86/kernel/e820.c >> @@ -538,6 +538,12 @@ u64 __init e820__range_update_table(struct e820_table *t, u64 start, u64 size, >> return __e820__range_update(t, start, size, old_type, new_type); >> } >> >> +u64 __init e820__range_update_firmware(u64 start, u64 size, enum e820_type old_type, >> + enum e820_type new_type) >> +{ >> + return __e820__range_update(e820_table_firmware, start, size, old_type, new_type); >> +} >> + >> /* Remove a range of memory from the E820 table: */ >> u64 __init e820__range_remove(u64 start, u64 size, enum e820_type old_type, bool check_type) >> { >> diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c >> index 88a96816de9a..aa95f77d7a30 100644 >> --- a/arch/x86/platform/efi/efi.c >> +++ b/arch/x86/platform/efi/efi.c >> @@ -171,6 +171,15 @@ static void __init do_add_efi_memmap(void) >> e820__update_table(e820_table); >> } >> >> +/* Reserve firmware area if it was marked as RAM */ >> +void arch_update_firmware_area(u64 addr, u64 size) >> +{ >> + if (e820__get_entry_type(addr, addr + size) == E820_TYPE_RAM) { >> + e820__range_update_firmware(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); >> + e820__update_table(e820_table_firmware); >> + } >> +} >> + >> /* >> * Given add_efi_memmap defaults to 0 and there is no alternative >> * e820 mechanism for soft-reserved memory, import the full EFI memory >> diff --git a/drivers/firmware/efi/tpm.c b/drivers/firmware/efi/tpm.c >> index e8d69bd548f3..8e6e7131d718 100644 >> --- a/drivers/firmware/efi/tpm.c >> +++ b/drivers/firmware/efi/tpm.c >> @@ -60,6 +60,7 @@ int __init efi_tpm_eventlog_init(void) >> } >> >> tbl_size = sizeof(*log_tbl) + log_tbl->size; >> + arch_update_firmware_area(efi.tpm_log, tbl_size); >> memblock_reserve(efi.tpm_log, tbl_size); >> >> if (efi.tpm_final_log == EFI_INVALID_TABLE_ADDR) { >> @@ -107,4 +108,3 @@ int __init efi_tpm_eventlog_init(void) >> early_memunmap(log_tbl, sizeof(*log_tbl)); >> return ret; >> } >> - >> diff --git a/include/linux/efi.h b/include/linux/efi.h >> index 6bf3c4fe8511..9c239cdff771 100644 >> --- a/include/linux/efi.h >> +++ b/include/linux/efi.h >> @@ -1371,4 +1371,11 @@ extern struct blocking_notifier_head efivar_ops_nh; >> void efivars_generic_ops_register(void); >> void efivars_generic_ops_unregister(void); >> >> +#ifdef CONFIG_X86_64 >> +void __init arch_update_firmware_area(u64 addr, u64 size); >> +#else >> +static inline void __init arch_update_firmware_area(u64 addr, u64 size) >> +{ >> +} >> +#endif >> #endif /* _LINUX_EFI_H */ >> -- >> 2.43.5 >>
© 2016 - 2024 Red Hat, Inc.