Try to be tolerant of errors if the machine check had been recovered
by the host.
Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
---
hw/ppc/spapr_events.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index d35151eeb0..3f524cb0ca 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -807,13 +807,20 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
/* get rtas addr from fdt */
rtas_addr = spapr_get_rtas_addr();
if (!rtas_addr) {
- warn_report("FWNMI: Unable to deliver machine check to guest: "
- "rtas_addr not found.");
- qemu_system_guest_panicked(NULL);
+ if (!recovered) {
+ warn_report("FWNMI: Unable to deliver machine check to guest: "
+ "rtas_addr not found.");
+ qemu_system_guest_panicked(NULL);
+ } else {
+ warn_report("FWNMI: Unable to deliver machine check to guest: "
+ "rtas_addr not found. Machine check recovered.");
+ }
g_free(ext_elog);
return;
}
+ spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
+
stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET,
env->gpr[3]);
cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
@@ -850,9 +857,14 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
* that CPU called "ibm,nmi-interlock")
*/
if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) {
- warn_report("FWNMI: Unable to deliver machine check to guest: "
- "nested machine check.");
- qemu_system_guest_panicked(NULL);
+ if (!recovered) {
+ warn_report("FWNMI: Unable to deliver machine check to guest: "
+ "nested machine check.");
+ qemu_system_guest_panicked(NULL);
+ } else {
+ warn_report("FWNMI: Unable to deliver machine check to guest: "
+ "nested machine check. Machine check recovered.");
+ }
return;
}
qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond);
@@ -880,7 +892,6 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
warn_report("Received a fwnmi while migration was in progress");
}
- spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
spapr_mce_dispatch_elog(cpu, recovered);
}
--
2.23.0
On Tue, 17 Mar 2020 15:02:14 +1000
Nicholas Piggin <npiggin@gmail.com> wrote:
> Try to be tolerant of errors if the machine check had been recovered
> by the host.
>
> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
> ---
Same comment as previous patch on multi-line error strings and
warn_report() in the !recovered case.
> hw/ppc/spapr_events.c | 25 ++++++++++++++++++-------
> 1 file changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index d35151eeb0..3f524cb0ca 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -807,13 +807,20 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
> /* get rtas addr from fdt */
> rtas_addr = spapr_get_rtas_addr();
> if (!rtas_addr) {
> - warn_report("FWNMI: Unable to deliver machine check to guest: "
> - "rtas_addr not found.");
> - qemu_system_guest_panicked(NULL);
> + if (!recovered) {
> + warn_report("FWNMI: Unable to deliver machine check to guest: "
> + "rtas_addr not found.");
> + qemu_system_guest_panicked(NULL);
> + } else {
> + warn_report("FWNMI: Unable to deliver machine check to guest: "
> + "rtas_addr not found. Machine check recovered.");
> + }
> g_free(ext_elog);
> return;
> }
>
> + spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
> +
I don't understand this change.
> stq_be_phys(&address_space_memory, rtas_addr + RTAS_ERROR_LOG_OFFSET,
> env->gpr[3]);
> cpu_physical_memory_write(rtas_addr + RTAS_ERROR_LOG_OFFSET +
> @@ -850,9 +857,14 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> * that CPU called "ibm,nmi-interlock")
> */
> if (spapr->fwnmi_machine_check_interlock == cpu->vcpu_id) {
> - warn_report("FWNMI: Unable to deliver machine check to guest: "
> - "nested machine check.");
> - qemu_system_guest_panicked(NULL);
> + if (!recovered) {
> + warn_report("FWNMI: Unable to deliver machine check to guest: "
> + "nested machine check.");
> + qemu_system_guest_panicked(NULL);
> + } else {
> + warn_report("FWNMI: Unable to deliver machine check to guest: "
> + "nested machine check. Machine check recovered.");
> + }
> return;
> }
> qemu_cond_wait_iothread(&spapr->fwnmi_machine_check_interlock_cond);
> @@ -880,7 +892,6 @@ void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> warn_report("Received a fwnmi while migration was in progress");
> }
>
> - spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
> spapr_mce_dispatch_elog(cpu, recovered);
> }
>
Greg Kurz's on March 18, 2020 2:57 am:
> On Tue, 17 Mar 2020 15:02:14 +1000
> Nicholas Piggin <npiggin@gmail.com> wrote:
>
>> Try to be tolerant of errors if the machine check had been recovered
>> by the host.
>>
>> Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
>> ---
>
> Same comment as previous patch on multi-line error strings and
> warn_report() in the !recovered case.
>
>> hw/ppc/spapr_events.c | 25 ++++++++++++++++++-------
>> 1 file changed, 18 insertions(+), 7 deletions(-)
>>
>> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
>> index d35151eeb0..3f524cb0ca 100644
>> --- a/hw/ppc/spapr_events.c
>> +++ b/hw/ppc/spapr_events.c
>> @@ -807,13 +807,20 @@ static void spapr_mce_dispatch_elog(PowerPCCPU *cpu, bool recovered)
>> /* get rtas addr from fdt */
>> rtas_addr = spapr_get_rtas_addr();
>> if (!rtas_addr) {
>> - warn_report("FWNMI: Unable to deliver machine check to guest: "
>> - "rtas_addr not found.");
>> - qemu_system_guest_panicked(NULL);
>> + if (!recovered) {
>> + warn_report("FWNMI: Unable to deliver machine check to guest: "
>> + "rtas_addr not found.");
>> + qemu_system_guest_panicked(NULL);
>> + } else {
>> + warn_report("FWNMI: Unable to deliver machine check to guest: "
>> + "rtas_addr not found. Machine check recovered.");
>> + }
>> g_free(ext_elog);
>> return;
>> }
>>
>> + spapr->fwnmi_machine_check_interlock = cpu->vcpu_id;
>> +
>
> I don't understand this change.
If we bail out without delivering the interrupt, we can't take the
interlock otherwise the guest can never release it.
Thanks,
Nick
© 2016 - 2026 Red Hat, Inc.