From: William Roche <william.roche@oracle.com>
In case of a large page impacted by a memory error, enhance
the existing Qemu error message which indicates that the error
is injected in the VM, adding "on lost large page SIZE@ADDR".
Include also a similar message to the ARM platform.
In the case of a large page impacted, we now report:
...Memory Error at QEMU addr X and GUEST addr Y on lost large page SIZE@ADDR of type...
Signed-off-by: William Roche <william.roche@oracle.com>
---
accel/kvm/kvm-all.c | 4 ----
target/arm/kvm.c | 13 +++++++++++++
target/i386/kvm/kvm.c | 18 ++++++++++++++----
3 files changed, 27 insertions(+), 8 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 4f2abd5774..f89568bfa3 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -1296,10 +1296,6 @@ static void kvm_unpoison_all(void *param)
void kvm_hwpoison_page_add(ram_addr_t ram_addr)
{
HWPoisonPage *page;
- size_t page_size = qemu_ram_pagesize_from_addr(ram_addr);
-
- if (page_size > TARGET_PAGE_SIZE)
- ram_addr = QEMU_ALIGN_DOWN(ram_addr, page_size);
QLIST_FOREACH(page, &hwpoison_page_list, list) {
if (page->ram_addr == ram_addr) {
diff --git a/target/arm/kvm.c b/target/arm/kvm.c
index a9444a2c7a..323ce0045d 100644
--- a/target/arm/kvm.c
+++ b/target/arm/kvm.c
@@ -2366,6 +2366,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
{
ram_addr_t ram_addr;
hwaddr paddr;
+ size_t page_size;
+ char lp_msg[54];
assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
@@ -2373,6 +2375,14 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
ram_addr = qemu_ram_addr_from_host(addr);
if (ram_addr != RAM_ADDR_INVALID &&
kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
+ page_size = qemu_ram_pagesize_from_addr(ram_addr);
+ if (page_size > TARGET_PAGE_SIZE) {
+ ram_addr = ROUND_DOWN(ram_addr, page_size);
+ snprintf(lp_msg, sizeof(lp_msg), " on lost large page "
+ RAM_ADDR_FMT "@" RAM_ADDR_FMT "", page_size, ram_addr);
+ } else {
+ lp_msg[0] = '\0';
+ }
kvm_hwpoison_page_add(ram_addr);
/*
* If this is a BUS_MCEERR_AR, we know we have been called
@@ -2389,6 +2399,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
kvm_cpu_synchronize_state(c);
if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
kvm_inject_arm_sea(c);
+ error_report("Guest Memory Error at QEMU addr %p and "
+ "GUEST addr 0x%" HWADDR_PRIx "%s of type %s injected",
+ addr, paddr, lp_msg, "BUS_MCEERR_AR");
} else {
error_report("failed to record the error");
abort();
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 2f66e63b88..7715cab7cf 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -741,6 +741,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
CPUX86State *env = &cpu->env;
ram_addr_t ram_addr;
hwaddr paddr;
+ size_t page_size;
+ char lp_msg[54];
/* If we get an action required MCE, it has been injected by KVM
* while the VM was running. An action optional MCE instead should
@@ -753,6 +755,14 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
ram_addr = qemu_ram_addr_from_host(addr);
if (ram_addr != RAM_ADDR_INVALID &&
kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
+ page_size = qemu_ram_pagesize_from_addr(ram_addr);
+ if (page_size > TARGET_PAGE_SIZE) {
+ ram_addr = ROUND_DOWN(ram_addr, page_size);
+ snprintf(lp_msg, sizeof(lp_msg), " on lost large page "
+ RAM_ADDR_FMT "@" RAM_ADDR_FMT "", page_size, ram_addr);
+ } else {
+ lp_msg[0] = '\0';
+ }
kvm_hwpoison_page_add(ram_addr);
kvm_mce_inject(cpu, paddr, code);
@@ -763,12 +773,12 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
*/
if (code == BUS_MCEERR_AR) {
error_report("Guest MCE Memory Error at QEMU addr %p and "
- "GUEST addr 0x%" HWADDR_PRIx " of type %s injected",
- addr, paddr, "BUS_MCEERR_AR");
+ "GUEST addr 0x%" HWADDR_PRIx "%s of type %s injected",
+ addr, paddr, lp_msg, "BUS_MCEERR_AR");
} else {
warn_report("Guest MCE Memory Error at QEMU addr %p and "
- "GUEST addr 0x%" HWADDR_PRIx " of type %s injected",
- addr, paddr, "BUS_MCEERR_AO");
+ "GUEST addr 0x%" HWADDR_PRIx "%s of type %s injected",
+ addr, paddr, lp_msg, "BUS_MCEERR_AO");
}
return;
--
2.43.5
On 10.01.25 22:14, “William Roche wrote:
> From: William Roche <william.roche@oracle.com>
>
> In case of a large page impacted by a memory error, enhance
> the existing Qemu error message which indicates that the error
> is injected in the VM, adding "on lost large page SIZE@ADDR".
>
> Include also a similar message to the ARM platform.
>
> In the case of a large page impacted, we now report:
> ...Memory Error at QEMU addr X and GUEST addr Y on lost large page SIZE@ADDR of type...
>
> Signed-off-by: William Roche <william.roche@oracle.com>
> ---
> accel/kvm/kvm-all.c | 4 ----
> target/arm/kvm.c | 13 +++++++++++++
> target/i386/kvm/kvm.c | 18 ++++++++++++++----
> 3 files changed, 27 insertions(+), 8 deletions(-)
>
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index 4f2abd5774..f89568bfa3 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -1296,10 +1296,6 @@ static void kvm_unpoison_all(void *param)
> void kvm_hwpoison_page_add(ram_addr_t ram_addr)
> {
> HWPoisonPage *page;
> - size_t page_size = qemu_ram_pagesize_from_addr(ram_addr);
> -
> - if (page_size > TARGET_PAGE_SIZE)
> - ram_addr = QEMU_ALIGN_DOWN(ram_addr, page_size);
>
> QLIST_FOREACH(page, &hwpoison_page_list, list) {
> if (page->ram_addr == ram_addr) {
> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
> index a9444a2c7a..323ce0045d 100644
> --- a/target/arm/kvm.c
> +++ b/target/arm/kvm.c
> @@ -2366,6 +2366,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
> {
> ram_addr_t ram_addr;
> hwaddr paddr;
> + size_t page_size;
> + char lp_msg[54];
>
> assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
>
> @@ -2373,6 +2375,14 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
> ram_addr = qemu_ram_addr_from_host(addr);
> if (ram_addr != RAM_ADDR_INVALID &&
> kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
> + page_size = qemu_ram_pagesize_from_addr(ram_addr);
> + if (page_size > TARGET_PAGE_SIZE) {
> + ram_addr = ROUND_DOWN(ram_addr, page_size);
> + snprintf(lp_msg, sizeof(lp_msg), " on lost large page "
> + RAM_ADDR_FMT "@" RAM_ADDR_FMT "", page_size, ram_addr);
> + } else {
> + lp_msg[0] = '\0';
> + }
> kvm_hwpoison_page_add(ram_addr);
> /*
> * If this is a BUS_MCEERR_AR, we know we have been called
> @@ -2389,6 +2399,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
> kvm_cpu_synchronize_state(c);
> if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
> kvm_inject_arm_sea(c);
> + error_report("Guest Memory Error at QEMU addr %p and "
> + "GUEST addr 0x%" HWADDR_PRIx "%s of type %s injected",
> + addr, paddr, lp_msg, "BUS_MCEERR_AR");
> } else {
> error_report("failed to record the error");
> abort();
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 2f66e63b88..7715cab7cf 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -741,6 +741,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
> CPUX86State *env = &cpu->env;
> ram_addr_t ram_addr;
> hwaddr paddr;
> + size_t page_size;
> + char lp_msg[54];
>
> /* If we get an action required MCE, it has been injected by KVM
> * while the VM was running. An action optional MCE instead should
> @@ -753,6 +755,14 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
> ram_addr = qemu_ram_addr_from_host(addr);
> if (ram_addr != RAM_ADDR_INVALID &&
> kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
> + page_size = qemu_ram_pagesize_from_addr(ram_addr);
> + if (page_size > TARGET_PAGE_SIZE) {
> + ram_addr = ROUND_DOWN(ram_addr, page_size);
As raised, aligning ram_addr_t addresses to page_size is wrong.
Maybe we really want to print block->idstr, offset, size like I proposed
at the other place, here as well?
--
Cheers,
David / dhildenb
On 1/14/25 15:09, David Hildenbrand wrote:
> On 10.01.25 22:14, “William Roche wrote:
>> From: William Roche <william.roche@oracle.com>
>>
>> In case of a large page impacted by a memory error, enhance
>> the existing Qemu error message which indicates that the error
>> is injected in the VM, adding "on lost large page SIZE@ADDR".
>>
>> Include also a similar message to the ARM platform.
>>
>> [...]
>> diff --git a/target/arm/kvm.c b/target/arm/kvm.c
>> index a9444a2c7a..323ce0045d 100644
>> --- a/target/arm/kvm.c
>> +++ b/target/arm/kvm.c
>> @@ -2366,6 +2366,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>> {
>> ram_addr_t ram_addr;
>> hwaddr paddr;
>> + size_t page_size;
>> + char lp_msg[54];
>> assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO);
>> @@ -2373,6 +2375,14 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>> ram_addr = qemu_ram_addr_from_host(addr);
>> if (ram_addr != RAM_ADDR_INVALID &&
>> kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) {
>> + page_size = qemu_ram_pagesize_from_addr(ram_addr);
>> + if (page_size > TARGET_PAGE_SIZE) {
>> + ram_addr = ROUND_DOWN(ram_addr, page_size);
>> + snprintf(lp_msg, sizeof(lp_msg), " on lost large page "
>> + RAM_ADDR_FMT "@" RAM_ADDR_FMT "", page_size, ram_addr);
>> + } else {
>> + lp_msg[0] = '\0';
>> + }
>> kvm_hwpoison_page_add(ram_addr);
>> /*
>> * If this is a BUS_MCEERR_AR, we know we have been called
>> @@ -2389,6 +2399,9 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>> kvm_cpu_synchronize_state(c);
>> if (!acpi_ghes_record_errors(ACPI_HEST_SRC_ID_SEA, paddr)) {
>> kvm_inject_arm_sea(c);
>> + error_report("Guest Memory Error at QEMU addr %p and "
>> + "GUEST addr 0x%" HWADDR_PRIx "%s of type %s injected",
>> + addr, paddr, lp_msg, "BUS_MCEERR_AR");
>> } else {
>> error_report("failed to record the error");
>> abort();
>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
>> index 2f66e63b88..7715cab7cf 100644
>> --- a/target/i386/kvm/kvm.c
>> +++ b/target/i386/kvm/kvm.c
>> @@ -741,6 +741,8 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int
>> code, void *addr)
>> CPUX86State *env = &cpu->env;
>> ram_addr_t ram_addr;
>> hwaddr paddr;
>> + size_t page_size;
>> + char lp_msg[54];
>> /* If we get an action required MCE, it has been injected by KVM
>> * while the VM was running. An action optional MCE instead should
>> @@ -753,6 +755,14 @@ void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr)
>> ram_addr = qemu_ram_addr_from_host(addr);
>> if (ram_addr != RAM_ADDR_INVALID &&
>> kvm_physical_memory_addr_from_host(c->kvm_state, addr,
>> &paddr)) {
>> + page_size = qemu_ram_pagesize_from_addr(ram_addr);
>> + if (page_size > TARGET_PAGE_SIZE) {
>> + ram_addr = ROUND_DOWN(ram_addr, page_size);
>
> As raised, aligning ram_addr_t addresses to page_size is wrong.
>
> Maybe we really want to print block->idstr, offset, size like I proposed
> at the other place, here as well?
Yes, we can collect the information from the block associated to this
ram_addr. But instead of duplicating the necessary code into both i386
and ARM, I came back to adding the change into the
kvm_hwpoison_page_add() function called from both i386 and ARM specific
code.
I also needed a new possibility to retrieve the information while we are
dealing with the SIGBUS signal, and created a new function to gather the
information from the RAMBlock:
qemu_ram_block_location_info_from_addr(ram_addr_t ram_addr,
struct RAMBlockInfo *b_info)
with the associated struct.
So that we can use the RCU_READ_LOCK_GUARD() and retrieve all the data.
Note about ARM failing on large pages:
----------=====----------------------
I could test that ARM VMs impacted by memory errors on a large
underlying memory page, can end up looping on reporting the error:
The VM encountering an error has a high probability to crash and can try
to save a vmcore with a kdump phase.
This fix introduces qemu messages reporting errors when they are relayed
to the VM.
A large page being poisoned by an error on ARM can make a VM loop on the
vmcore collection phase and the console would show messages like that
appearing every 10 seconds (before the change):
vvv
Starting Kdump Vmcore Save Service...
[ 3.095399] kdump[445]: Kdump is using the default log level(3).
[ 3.173998] kdump[481]: saving to
/sysroot/var/crash/127.0.0.1-2025-01-27-20:17:40/
[ 3.189683] kdump[486]: saving vmcore-dmesg.txt to
/sysroot/var/crash/127.0.0.1-2025-01-27-20:17:40/
[ 3.213584] kdump[492]: saving vmcore-dmesg.txt complete
[ 3.220295] kdump[494]: saving vmcore
[ 10.029515] EDAC MC0: 1 UE unknown on unknown memory ( page:0x116c60
offset:0x0 grain:1 - APEI location: )
[ 10.033647] [Firmware Warn]: GHES: Invalid address in generic error
data: 0x116c60000
[ 10.036974] {2}[Hardware Error]: Hardware error from APEI Generic
Hardware Error Source: 0
[ 10.040514] {2}[Hardware Error]: event severity: recoverable
[ 10.042911] {2}[Hardware Error]: Error 0, type: recoverable
[ 10.045310] {2}[Hardware Error]: section_type: memory error
[ 10.047666] {2}[Hardware Error]: physical_address: 0x0000000116c60000
[ 10.050486] {2}[Hardware Error]: error_type: 0, unknown
[ 20.053205] EDAC MC0: 1 UE unknown on unknown memory ( page:0x116c60
offset:0x0 grain:1 - APEI location: )
[ 20.057416] [Firmware Warn]: GHES: Invalid address in generic error
data: 0x116c60000
[ 20.060781] {3}[Hardware Error]: Hardware error from APEI Generic
Hardware Error Source: 0
[ 20.065472] {3}[Hardware Error]: event severity: recoverable
[ 20.067878] {3}[Hardware Error]: Error 0, type: recoverable
[ 20.070273] {3}[Hardware Error]: section_type: memory error
[ 20.072686] {3}[Hardware Error]: physical_address: 0x0000000116c60000
[ 20.075590] {3}[Hardware Error]: error_type: 0, unknown
^^^
with the fix, we now have a flood of messages like:
vvv
qemu-system-aarch64: Memory Error on large page from
ram-node1:d5e00000+0 +200000
qemu-system-aarch64: Guest Memory Error at QEMU addr 0xffff35c79000 and
GUEST addr 0x115e79000 of type BUS_MCEERR_AR injected
qemu-system-aarch64: Memory Error on large page from
ram-node1:d5e00000+0 +200000
qemu-system-aarch64: Guest Memory Error at QEMU addr 0xffff35c79000 and
GUEST addr 0x115e79000 of type BUS_MCEERR_AR injected
qemu-system-aarch64: Memory Error on large page from
ram-node1:d5e00000+0 +200000
qemu-system-aarch64: Guest Memory Error at QEMU addr 0xffff35c79000 and
GUEST addr 0x115e79000 of type BUS_MCEERR_AR injected
^^^
In both cases, this situation loops indefinitely !
I'm just informing of a change of behavior, fixing this issue would most
probably require VM kernel modifications or a work-around in qemu when
errors are reported too often, but is out of the scope of this current
qemu fix.
> Yes, we can collect the information from the block associated to this
> ram_addr. But instead of duplicating the necessary code into both i386
> and ARM, I came back to adding the change into the
> kvm_hwpoison_page_add() function called from both i386 and ARM specific
> code.
>
> I also needed a new possibility to retrieve the information while we are
> dealing with the SIGBUS signal, and created a new function to gather the
> information from the RAMBlock:
> qemu_ram_block_location_info_from_addr(ram_addr_t ram_addr,
> struct RAMBlockInfo *b_info)
> with the associated struct.
>
> So that we can use the RCU_READ_LOCK_GUARD() and retrieve all the data.
Makes sense.
>
>
> Note about ARM failing on large pages:
> ----------=====----------------------
> I could test that ARM VMs impacted by memory errors on a large
> underlying memory page, can end up looping on reporting the error:
> The VM encountering an error has a high probability to crash and can try
> to save a vmcore with a kdump phase.
Yeah, that's what I thought. If you rip out 1 GiB of memory, your VM is
going to have a bad time :/
>
> This fix introduces qemu messages reporting errors when they are relayed
> to the VM.
> A large page being poisoned by an error on ARM can make a VM loop on the
> vmcore collection phase and the console would show messages like that
> appearing every 10 seconds (before the change):
>
> vvv
> Starting Kdump Vmcore Save Service...
> [ 3.095399] kdump[445]: Kdump is using the default log level(3).
> [ 3.173998] kdump[481]: saving to
> /sysroot/var/crash/127.0.0.1-2025-01-27-20:17:40/
> [ 3.189683] kdump[486]: saving vmcore-dmesg.txt to
> /sysroot/var/crash/127.0.0.1-2025-01-27-20:17:40/
> [ 3.213584] kdump[492]: saving vmcore-dmesg.txt complete
> [ 3.220295] kdump[494]: saving vmcore
> [ 10.029515] EDAC MC0: 1 UE unknown on unknown memory ( page:0x116c60
> offset:0x0 grain:1 - APEI location: )
> [ 10.033647] [Firmware Warn]: GHES: Invalid address in generic error
> data: 0x116c60000
> [ 10.036974] {2}[Hardware Error]: Hardware error from APEI Generic
> Hardware Error Source: 0
> [ 10.040514] {2}[Hardware Error]: event severity: recoverable
> [ 10.042911] {2}[Hardware Error]: Error 0, type: recoverable
> [ 10.045310] {2}[Hardware Error]: section_type: memory error
> [ 10.047666] {2}[Hardware Error]: physical_address: 0x0000000116c60000
> [ 10.050486] {2}[Hardware Error]: error_type: 0, unknown
> [ 20.053205] EDAC MC0: 1 UE unknown on unknown memory ( page:0x116c60
> offset:0x0 grain:1 - APEI location: )
> [ 20.057416] [Firmware Warn]: GHES: Invalid address in generic error
> data: 0x116c60000
> [ 20.060781] {3}[Hardware Error]: Hardware error from APEI Generic
> Hardware Error Source: 0
> [ 20.065472] {3}[Hardware Error]: event severity: recoverable
> [ 20.067878] {3}[Hardware Error]: Error 0, type: recoverable
> [ 20.070273] {3}[Hardware Error]: section_type: memory error
> [ 20.072686] {3}[Hardware Error]: physical_address: 0x0000000116c60000
> [ 20.075590] {3}[Hardware Error]: error_type: 0, unknown
> ^^^
>
> with the fix, we now have a flood of messages like:
>
> vvv
> qemu-system-aarch64: Memory Error on large page from
> ram-node1:d5e00000+0 +200000
> qemu-system-aarch64: Guest Memory Error at QEMU addr 0xffff35c79000 and
> GUEST addr 0x115e79000 of type BUS_MCEERR_AR injected
> qemu-system-aarch64: Memory Error on large page from
> ram-node1:d5e00000+0 +200000
> qemu-system-aarch64: Guest Memory Error at QEMU addr 0xffff35c79000 and
> GUEST addr 0x115e79000 of type BUS_MCEERR_AR injected
> qemu-system-aarch64: Memory Error on large page from
> ram-node1:d5e00000+0 +200000
> qemu-system-aarch64: Guest Memory Error at QEMU addr 0xffff35c79000 and
> GUEST addr 0x115e79000 of type BUS_MCEERR_AR injected
> ^^^
>
>
> In both cases, this situation loops indefinitely !
>
> I'm just informing of a change of behavior, fixing this issue would most
> probably require VM kernel modifications or a work-around in qemu when
> errors are reported too often, but is out of the scope of this current
> qemu fix.
Agreed. I think one problem is that kdump cannot really cope with new
memory errors (it tries to not touch pages that had a memory error in
the old kernel).
Maybe this is also due to the fact that we inform the kernel only about
a single page vanishing, whereby actually a whole 1 GiB is vanishing.
--
Cheers,
David / dhildenb
© 2016 - 2026 Red Hat, Inc.