From: William Roche <william.roche@oracle.com>
In case of a large page impacted by a memory error, enhance
the existing Qemu error message which indicates that the error
is injected in the VM, adding "on lost large page SIZE@ADDR".
Include also a similar message to the ARM platform.
In the case of a large page impacted, we now report:
...Memory Error at QEMU addr X and GUEST addr Y on lost large page SIZE@ADDR of type...
Signed-off-by: William Roche <william.roche@oracle.com>
---
accel/kvm/kvm-all.c | 4 ----
target/arm/kvm.c | 13 +++++++++++++
target/i386/kvm/kvm.c | 18 ++++++++++++++----
3 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 4f2abd5774..f89568bfa3 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1296,10 +1296,6 @@ static void kvm_unpoison_all(void *param)
void kvm_hwpoison_page_add(ram_addr_t ram_addr)
{
HWPoisonPage *page;
- size_t page_size = qemu_ram_pagesize_from_addr(ram_addr);
-
- if (page_size > TARGET_PAGE_SIZE)
- ram_addr = QEMU_ALIGN_DOWN(ram_addr, page_size);
QLIST_FOREACH(page, &hwpoison_page_list, list) {
if (page->ram_addr == ram_addr) {
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index a9444a2c7a..323ce0045d 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -2366,6 +2366,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
{
ram_addr_t ram_addr;
hwaddr paddr;
+ size_t page_size;
+ char lp_msg[54];
assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
@@ -2373,6 +2375,14 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
ram_addr = qemu_ram_addr_from_host(addr);
if (ram_addr != RAM_ADDR_INVALID &&
kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
+ page_size = qemu_ram_pagesize_from_addr(ram_addr);
+ if (page_size > TARGET_PAGE_SIZE) {
+ ram_addr = ROUND_DOWN(ram_addr, page_size);
+ snprintf(lp_msg, sizeof(lp_msg), " on lost large page "
+ RAM_ADDR_FMT "@" RAM_ADDR_FMT "", page_size, ram_addr);
+ } else {
+ lp_msg[0] = '\0';
+ }
kvm_hwpoison_page_add(ram_addr);
/*
* If this is a BUS_MCEERR_AR, we know we have been called
@@ -2389,6 +2399,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
kvm_cpu_synchronize_state(c);
if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
kvm_inject_arm_sea(c);
+ error_report("Guest Memory Error at QEMU addr %p and "
+ "GUEST addr 0x%" HWADDR_PRIx "%s of type %s injected",
+ addr, paddr, lp_msg, "BUS_MCEERR_AR");
} else {
error_report("failed to record the error");
abort();
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 2f66e63b88..7715cab7cf 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -741,6 +741,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
CPUX86State *env = &cpu->env;
ram_addr_t ram_addr;
hwaddr paddr;
+ size_t page_size;
+ char lp_msg[54];
/* If we get an action required MCE, it has been injected by KVM
* while the VM was running. An action optional MCE instead should
@@ -753,6 +755,14 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
ram_addr = qemu_ram_addr_from_host(addr);
if (ram_addr != RAM_ADDR_INVALID &&
kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
+ page_size = qemu_ram_pagesize_from_addr(ram_addr);
+ if (page_size > TARGET_PAGE_SIZE) {
+ ram_addr = ROUND_DOWN(ram_addr, page_size);
+ snprintf(lp_msg, sizeof(lp_msg), " on lost large page "
+ RAM_ADDR_FMT "@" RAM_ADDR_FMT "", page_size, ram_addr);
+ } else {
+ lp_msg[0] = '\0';
+ }
kvm_hwpoison_page_add(ram_addr);
kvm_mce_inject(cpu, paddr, code);
@@ -763,12 +773,12 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
*/
if (code == BUS_MCEERR_AR) {
error_report("Guest MCE Memory Error at QEMU addr %p and "
- "GUEST addr 0x%" HWADDR_PRIx " of type %s injected",
- addr, paddr, "BUS_MCEERR_AR");
+ "GUEST addr 0x%" HWADDR_PRIx "%s of type %s injected",
+ addr, paddr, lp_msg, "BUS_MCEERR_AR");
} else {
warn_report("Guest MCE Memory Error at QEMU addr %p and "
- "GUEST addr 0x%" HWADDR_PRIx " of type %s injected",
- addr, paddr, "BUS_MCEERR_AO");
+ "GUEST addr 0x%" HWADDR_PRIx "%s of type %s injected",
+ addr, paddr, lp_msg, "BUS_MCEERR_AO");
}
return;
--
2.43.5
On 10.01.25 22:14, “William Roche wrote: > From: William Roche <william.roche@oracle.com> > > In case of a large page impacted by a memory error, enhance > the existing Qemu error message which indicates that the error > is injected in the VM, adding "on lost large page SIZE@ADDR". > > Include also a similar message to the ARM platform. > > In the case of a large page impacted, we now report: > ...Memory Error at QEMU addr X and GUEST addr Y on lost large page SIZE@ADDR of type... > > Signed-off-by: William Roche <william.roche@oracle.com> > --- > accel/kvm/kvm-all.c | 4 ---- > target/arm/kvm.c | 13 +++++++++++++ > target/i386/kvm/kvm.c | 18 ++++++++++++++---- > 3 files changed, 27 insertions(+), 8 deletions(-) > > diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c > index 4f2abd5774..f89568bfa3 100644 > --- a/accel/kvm/kvm-all.c > +++ b/accel/kvm/kvm-all.c > @@ -1296,10 +1296,6 @@ static void kvm_unpoison_all(void *param) > void kvm_hwpoison_page_add(ram_addr_t ram_addr) > { > HWPoisonPage *page; > - size_t page_size = qemu_ram_pagesize_from_addr(ram_addr); > - > - if (page_size > TARGET_PAGE_SIZE) > - ram_addr = QEMU_ALIGN_DOWN(ram_addr, page_size); > > QLIST_FOREACH(page, &hwpoison_page_list, list) { > if (page->ram_addr == ram_addr) { > diff --git a/target/arm/kvm.c b/target/arm/kvm.c > index a9444a2c7a..323ce0045d 100644 > --- a/target/arm/kvm.c > +++ b/target/arm/kvm.c > @@ -2366,6 +2366,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) > { > ram_addr_t ram_addr; > hwaddr paddr; > + size_t page_size; > + char lp_msg[54]; > > assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); > > @@ -2373,6 +2375,14 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) > ram_addr = qemu_ram_addr_from_host(addr); > if (ram_addr != RAM_ADDR_INVALID && > kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { > + page_size = qemu_ram_pagesize_from_addr(ram_addr); > + if (page_size > TARGET_PAGE_SIZE) { > + ram_addr = ROUND_DOWN(ram_addr, page_size); > + snprintf(lp_msg, sizeof(lp_msg), " on lost large page " > + RAM_ADDR_FMT "@" RAM_ADDR_FMT "", page_size, ram_addr); > + } else { > + lp_msg[0] = '\0'; > + } > kvm_hwpoison_page_add(ram_addr); > /* > * If this is a BUS_MCEERR_AR, we know we have been called > @@ -2389,6 +2399,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) > kvm_cpu_synchronize_state(c); > if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { > kvm_inject_arm_sea(c); > + error_report("Guest Memory Error at QEMU addr %p and " > + "GUEST addr 0x%" HWADDR_PRIx "%s of type %s injected", > + addr, paddr, lp_msg, "BUS_MCEERR_AR"); > } else { > error_report("failed to record the error"); > abort(); > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c > index 2f66e63b88..7715cab7cf 100644 > --- a/target/i386/kvm/kvm.c > +++ b/target/i386/kvm/kvm.c > @@ -741,6 +741,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) > CPUX86State *env = &cpu->env; > ram_addr_t ram_addr; > hwaddr paddr; > + size_t page_size; > + char lp_msg[54]; > > /* If we get an action required MCE, it has been injected by KVM > * while the VM was running. An action optional MCE instead should > @@ -753,6 +755,14 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) > ram_addr = qemu_ram_addr_from_host(addr); > if (ram_addr != RAM_ADDR_INVALID && > kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { > + page_size = qemu_ram_pagesize_from_addr(ram_addr); > + if (page_size > TARGET_PAGE_SIZE) { > + ram_addr = ROUND_DOWN(ram_addr, page_size); As raised, aligning ram_addr_t addresses to page_size is wrong. Maybe we really want to print block->idstr, offset, size like I proposed at the other place, here as well? -- Cheers, David / dhildenb
On 1/14/25 15:09, David Hildenbrand wrote: > On 10.01.25 22:14, “William Roche wrote: >> From: William Roche <william.roche@oracle.com> >> >> In case of a large page impacted by a memory error, enhance >> the existing Qemu error message which indicates that the error >> is injected in the VM, adding "on lost large page SIZE@ADDR". >> >> Include also a similar message to the ARM platform. >> >> [...] >> diff --git a/target/arm/kvm.c b/target/arm/kvm.c >> index a9444a2c7a..323ce0045d 100644 >> --- a/target/arm/kvm.c >> +++ b/target/arm/kvm.c >> @@ -2366,6 +2366,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) >> { >> ram_addr_t ram_addr; >> hwaddr paddr; >> + size_t page_size; >> + char lp_msg[54]; >> assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); >> @@ -2373,6 +2375,14 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) >> ram_addr = qemu_ram_addr_from_host(addr); >> if (ram_addr != RAM_ADDR_INVALID && >> kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { >> + page_size = qemu_ram_pagesize_from_addr(ram_addr); >> + if (page_size > TARGET_PAGE_SIZE) { >> + ram_addr = ROUND_DOWN(ram_addr, page_size); >> + snprintf(lp_msg, sizeof(lp_msg), " on lost large page " >> + RAM_ADDR_FMT "@" RAM_ADDR_FMT "", page_size, ram_addr); >> + } else { >> + lp_msg[0] = '\0'; >> + } >> kvm_hwpoison_page_add(ram_addr); >> /* >> * If this is a BUS_MCEERR_AR, we know we have been called >> @@ -2389,6 +2399,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) >> kvm_cpu_synchronize_state(c); >> if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) { >> kvm_inject_arm_sea(c); >> + error_report("Guest Memory Error at QEMU addr %p and " >> + "GUEST addr 0x%" HWADDR_PRIx "%s of type %s injected", >> + addr, paddr, lp_msg, "BUS_MCEERR_AR"); >> } else { >> error_report("failed to record the error"); >> abort(); >> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c >> index 2f66e63b88..7715cab7cf 100644 >> --- a/target/i386/kvm/kvm.c >> +++ b/target/i386/kvm/kvm.c >> @@ -741,6 +741,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int >> code, void *addr) >> CPUX86State *env = &cpu->env; >> ram_addr_t ram_addr; >> hwaddr paddr; >> + size_t page_size; >> + char lp_msg[54]; >> /* If we get an action required MCE, it has been injected by KVM >> * while the VM was running. An action optional MCE instead should >> @@ -753,6 +755,14 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) >> ram_addr = qemu_ram_addr_from_host(addr); >> if (ram_addr != RAM_ADDR_INVALID && >> kvm_physical_memory_addr_from_host(c->kvm_state, addr, >> &paddr)) { >> + page_size = qemu_ram_pagesize_from_addr(ram_addr); >> + if (page_size > TARGET_PAGE_SIZE) { >> + ram_addr = ROUND_DOWN(ram_addr, page_size); > > As raised, aligning ram_addr_t addresses to page_size is wrong. > > Maybe we really want to print block->idstr, offset, size like I proposed > at the other place, here as well? Yes, we can collect the information from the block associated to this ram_addr. But instead of duplicating the necessary code into both i386 and ARM, I came back to adding the change into the kvm_hwpoison_page_add() function called from both i386 and ARM specific code. I also needed a new possibility to retrieve the information while we are dealing with the SIGBUS signal, and created a new function to gather the information from the RAMBlock: qemu_ram_block_location_info_from_addr(ram_addr_t ram_addr, struct RAMBlockInfo *b_info) with the associated struct. So that we can use the RCU_READ_LOCK_GUARD() and retrieve all the data. Note about ARM failing on large pages: ----------=====---------------------- I could test that ARM VMs impacted by memory errors on a large underlying memory page, can end up looping on reporting the error: The VM encountering an error has a high probability to crash and can try to save a vmcore with a kdump phase. This fix introduces qemu messages reporting errors when they are relayed to the VM. A large page being poisoned by an error on ARM can make a VM loop on the vmcore collection phase and the console would show messages like that appearing every 10 seconds (before the change): vvv Starting Kdump Vmcore Save Service... [ 3.095399] kdump[445]: Kdump is using the default log level(3). [ 3.173998] kdump[481]: saving to /sysroot/var/crash/127.0.0.1-2025-01-27-20:17:40/ [ 3.189683] kdump[486]: saving vmcore-dmesg.txt to /sysroot/var/crash/127.0.0.1-2025-01-27-20:17:40/ [ 3.213584] kdump[492]: saving vmcore-dmesg.txt complete [ 3.220295] kdump[494]: saving vmcore [ 10.029515] EDAC MC0: 1 UE unknown on unknown memory ( page:0x116c60 offset:0x0 grain:1 - APEI location: ) [ 10.033647] [Firmware Warn]: GHES: Invalid address in generic error data: 0x116c60000 [ 10.036974] {2}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 0 [ 10.040514] {2}[Hardware Error]: event severity: recoverable [ 10.042911] {2}[Hardware Error]: Error 0, type: recoverable [ 10.045310] {2}[Hardware Error]: section_type: memory error [ 10.047666] {2}[Hardware Error]: physical_address: 0x0000000116c60000 [ 10.050486] {2}[Hardware Error]: error_type: 0, unknown [ 20.053205] EDAC MC0: 1 UE unknown on unknown memory ( page:0x116c60 offset:0x0 grain:1 - APEI location: ) [ 20.057416] [Firmware Warn]: GHES: Invalid address in generic error data: 0x116c60000 [ 20.060781] {3}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 0 [ 20.065472] {3}[Hardware Error]: event severity: recoverable [ 20.067878] {3}[Hardware Error]: Error 0, type: recoverable [ 20.070273] {3}[Hardware Error]: section_type: memory error [ 20.072686] {3}[Hardware Error]: physical_address: 0x0000000116c60000 [ 20.075590] {3}[Hardware Error]: error_type: 0, unknown ^^^ with the fix, we now have a flood of messages like: vvv qemu-system-aarch64: Memory Error on large page from ram-node1:d5e00000+0 +200000 qemu-system-aarch64: Guest Memory Error at QEMU addr 0xffff35c79000 and GUEST addr 0x115e79000 of type BUS_MCEERR_AR injected qemu-system-aarch64: Memory Error on large page from ram-node1:d5e00000+0 +200000 qemu-system-aarch64: Guest Memory Error at QEMU addr 0xffff35c79000 and GUEST addr 0x115e79000 of type BUS_MCEERR_AR injected qemu-system-aarch64: Memory Error on large page from ram-node1:d5e00000+0 +200000 qemu-system-aarch64: Guest Memory Error at QEMU addr 0xffff35c79000 and GUEST addr 0x115e79000 of type BUS_MCEERR_AR injected ^^^ In both cases, this situation loops indefinitely ! I'm just informing of a change of behavior, fixing this issue would most probably require VM kernel modifications or a work-around in qemu when errors are reported too often, but is out of the scope of this current qemu fix.
> Yes, we can collect the information from the block associated to this > ram_addr. But instead of duplicating the necessary code into both i386 > and ARM, I came back to adding the change into the > kvm_hwpoison_page_add() function called from both i386 and ARM specific > code. > > I also needed a new possibility to retrieve the information while we are > dealing with the SIGBUS signal, and created a new function to gather the > information from the RAMBlock: > qemu_ram_block_location_info_from_addr(ram_addr_t ram_addr, > struct RAMBlockInfo *b_info) > with the associated struct. > > So that we can use the RCU_READ_LOCK_GUARD() and retrieve all the data. Makes sense. > > > Note about ARM failing on large pages: > ----------=====---------------------- > I could test that ARM VMs impacted by memory errors on a large > underlying memory page, can end up looping on reporting the error: > The VM encountering an error has a high probability to crash and can try > to save a vmcore with a kdump phase. Yeah, that's what I thought. If you rip out 1 GiB of memory, your VM is going to have a bad time :/ > > This fix introduces qemu messages reporting errors when they are relayed > to the VM. > A large page being poisoned by an error on ARM can make a VM loop on the > vmcore collection phase and the console would show messages like that > appearing every 10 seconds (before the change): > > vvv > Starting Kdump Vmcore Save Service... > [ 3.095399] kdump[445]: Kdump is using the default log level(3). > [ 3.173998] kdump[481]: saving to > /sysroot/var/crash/127.0.0.1-2025-01-27-20:17:40/ > [ 3.189683] kdump[486]: saving vmcore-dmesg.txt to > /sysroot/var/crash/127.0.0.1-2025-01-27-20:17:40/ > [ 3.213584] kdump[492]: saving vmcore-dmesg.txt complete > [ 3.220295] kdump[494]: saving vmcore > [ 10.029515] EDAC MC0: 1 UE unknown on unknown memory ( page:0x116c60 > offset:0x0 grain:1 - APEI location: ) > [ 10.033647] [Firmware Warn]: GHES: Invalid address in generic error > data: 0x116c60000 > [ 10.036974] {2}[Hardware Error]: Hardware error from APEI Generic > Hardware Error Source: 0 > [ 10.040514] {2}[Hardware Error]: event severity: recoverable > [ 10.042911] {2}[Hardware Error]: Error 0, type: recoverable > [ 10.045310] {2}[Hardware Error]: section_type: memory error > [ 10.047666] {2}[Hardware Error]: physical_address: 0x0000000116c60000 > [ 10.050486] {2}[Hardware Error]: error_type: 0, unknown > [ 20.053205] EDAC MC0: 1 UE unknown on unknown memory ( page:0x116c60 > offset:0x0 grain:1 - APEI location: ) > [ 20.057416] [Firmware Warn]: GHES: Invalid address in generic error > data: 0x116c60000 > [ 20.060781] {3}[Hardware Error]: Hardware error from APEI Generic > Hardware Error Source: 0 > [ 20.065472] {3}[Hardware Error]: event severity: recoverable > [ 20.067878] {3}[Hardware Error]: Error 0, type: recoverable > [ 20.070273] {3}[Hardware Error]: section_type: memory error > [ 20.072686] {3}[Hardware Error]: physical_address: 0x0000000116c60000 > [ 20.075590] {3}[Hardware Error]: error_type: 0, unknown > ^^^ > > with the fix, we now have a flood of messages like: > > vvv > qemu-system-aarch64: Memory Error on large page from > ram-node1:d5e00000+0 +200000 > qemu-system-aarch64: Guest Memory Error at QEMU addr 0xffff35c79000 and > GUEST addr 0x115e79000 of type BUS_MCEERR_AR injected > qemu-system-aarch64: Memory Error on large page from > ram-node1:d5e00000+0 +200000 > qemu-system-aarch64: Guest Memory Error at QEMU addr 0xffff35c79000 and > GUEST addr 0x115e79000 of type BUS_MCEERR_AR injected > qemu-system-aarch64: Memory Error on large page from > ram-node1:d5e00000+0 +200000 > qemu-system-aarch64: Guest Memory Error at QEMU addr 0xffff35c79000 and > GUEST addr 0x115e79000 of type BUS_MCEERR_AR injected > ^^^ > > > In both cases, this situation loops indefinitely ! > > I'm just informing of a change of behavior, fixing this issue would most > probably require VM kernel modifications or a work-around in qemu when > errors are reported too often, but is out of the scope of this current > qemu fix. Agreed. I think one problem is that kdump cannot really cope with new memory errors (it tries to not touch pages that had a memory error in the old kernel). Maybe this is also due to the fact that we inform the kernel only about a single page vanishing, whereby actually a whole 1 GiB is vanishing. -- Cheers, David / dhildenb
© 2016 - 2025 Red Hat, Inc.