Block VM migration requests until the machine check
error handling is complete as (i) these errors are
specific to the source hardware and is irrelevant on
the target hardware, (ii) these errors cause data
corruption and should be handled before migration.
Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
---
hw/ppc/spapr_rtas.c | 3 +++
include/hw/ppc/spapr.h | 2 ++
target/ppc/kvm.c | 17 +++++++++++++++++
3 files changed, 22 insertions(+)
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index d017a67..17f6567 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -47,6 +47,7 @@
#include "trace.h"
#include "hw/ppc/fdt.h"
#include "kvm_ppc.h"
+#include "migration/blocker.h"
static void rtas_display_character(PowerPCCPU *cpu, sPAPRMachineState *spapr,
uint32_t token, uint32_t nargs,
@@ -390,6 +391,8 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
spapr->mc_status = -1;
qemu_cond_signal(&spapr->mc_delivery_cond);
rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+ migrate_del_blocker(spapr->migration_blocker);
+ error_free(spapr->migration_blocker);
}
}
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index a75e9cf..0890a44 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -7,6 +7,7 @@
#include "hw/ppc/spapr_drc.h"
#include "hw/mem/pc-dimm.h"
#include "hw/ppc/spapr_ovec.h"
+#include "qapi/error.h"
struct VIOsPAPRBus;
struct sPAPRPHBState;
@@ -136,6 +137,7 @@ struct sPAPRMachineState {
MemoryHotplugState hotplug_memory;
const char *icp_type;
+ Error *migration_blocker;
};
#define H_SUCCESS 0
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 59b3322..58de7ea 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -52,6 +52,7 @@
#endif
#include "elf.h"
#include "sysemu/kvm_int.h"
+#include "migration/blocker.h"
//#define DEBUG_KVM
@@ -2770,10 +2771,26 @@ int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run)
sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
target_ulong msr = 0;
+ Error *local_err = NULL;
+ int ret;
bool type, le;
cpu_synchronize_state(CPU(cpu));
+ error_setg(&spapr->migration_blocker,
+ "Live migration not supported during machine check error handling");
+ ret = migrate_add_blocker(spapr->migration_blocker, &local_err);
+ if (ret < 0) {
+ /*
+ * We don't want to abort and let the migration to continue. In a
+ * rare case, the machine check handler will run on the target
+ * hardware. Though this is not preferable, it is better than aborting
+ * the migration or killing the VM.
+ */
+ error_free(spapr->migration_blocker);
+ fprintf(stderr, "Warning: Machine check during VM migration\n");
+ }
+
/*
* Properly set bits in MSR before we invoke the handler.
* SRR0/1, DAR and DSISR are properly set by KVM
On Thu, Sep 28, 2017 at 04:08:31PM +0530, Aravinda Prasad wrote:
> Block VM migration requests until the machine check
> error handling is complete as (i) these errors are
> specific to the source hardware and is irrelevant on
> the target hardware, (ii) these errors cause data
> corruption and should be handled before migration.
>
> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> ---
> hw/ppc/spapr_rtas.c | 3 +++
> include/hw/ppc/spapr.h | 2 ++
> target/ppc/kvm.c | 17 +++++++++++++++++
> 3 files changed, 22 insertions(+)
>
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index d017a67..17f6567 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -47,6 +47,7 @@
> #include "trace.h"
> #include "hw/ppc/fdt.h"
> #include "kvm_ppc.h"
> +#include "migration/blocker.h"
>
> static void rtas_display_character(PowerPCCPU *cpu, sPAPRMachineState *spapr,
> uint32_t token, uint32_t nargs,
> @@ -390,6 +391,8 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> spapr->mc_status = -1;
> qemu_cond_signal(&spapr->mc_delivery_cond);
> rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> + migrate_del_blocker(spapr->migration_blocker);
> + error_free(spapr->migration_blocker);
> }
> }
>
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index a75e9cf..0890a44 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -7,6 +7,7 @@
> #include "hw/ppc/spapr_drc.h"
> #include "hw/mem/pc-dimm.h"
> #include "hw/ppc/spapr_ovec.h"
> +#include "qapi/error.h"
>
> struct VIOsPAPRBus;
> struct sPAPRPHBState;
> @@ -136,6 +137,7 @@ struct sPAPRMachineState {
> MemoryHotplugState hotplug_memory;
>
> const char *icp_type;
> + Error *migration_blocker;
This isn't a good name, because it's _specifically_ the fwnmi as a
migration blocker - trying to put another migration blocker in here
would break horribly, because nmi-interlock would clear it regardless.
> };
>
> #define H_SUCCESS 0
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index 59b3322..58de7ea 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -52,6 +52,7 @@
> #endif
> #include "elf.h"
> #include "sysemu/kvm_int.h"
> +#include "migration/blocker.h"
>
> //#define DEBUG_KVM
>
> @@ -2770,10 +2771,26 @@ int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run)
> sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
> target_ulong msr = 0;
> + Error *local_err = NULL;
> + int ret;
> bool type, le;
>
> cpu_synchronize_state(CPU(cpu));
>
> + error_setg(&spapr->migration_blocker,
> + "Live migration not supported during machine check error handling");
In fact, there's no real reason to generate the error here. The
error's always the same so you could just create it at startup as a
global and just add/remove it to the block list.
> + ret = migrate_add_blocker(spapr->migration_blocker, &local_err);
> + if (ret < 0) {
> + /*
> + * We don't want to abort and let the migration to continue. In a
> + * rare case, the machine check handler will run on the target
> + * hardware. Though this is not preferable, it is better than aborting
Why is it not preferable? I mean it's an edge case, but AFAICT it's
still the correct behaviour.
> + * the migration or killing the VM.
> + */
> + error_free(spapr->migration_blocker);
> + fprintf(stderr, "Warning: Machine check during VM migration\n");
Use error_report(), not fprintf().
> + }
> +
> /*
> * Properly set bits in MSR before we invoke the handler.
> * SRR0/1, DAR and DSISR are properly set by KVM
>
--
David Gibson | I'll have my music baroque, and my code
david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson
On Wednesday 04 October 2017 07:09 AM, David Gibson wrote:
> On Thu, Sep 28, 2017 at 04:08:31PM +0530, Aravinda Prasad wrote:
>> Block VM migration requests until the machine check
>> error handling is complete as (i) these errors are
>> specific to the source hardware and is irrelevant on
>> the target hardware, (ii) these errors cause data
>> corruption and should be handled before migration.
>>
>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>> ---
>> hw/ppc/spapr_rtas.c | 3 +++
>> include/hw/ppc/spapr.h | 2 ++
>> target/ppc/kvm.c | 17 +++++++++++++++++
>> 3 files changed, 22 insertions(+)
>>
>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>> index d017a67..17f6567 100644
>> --- a/hw/ppc/spapr_rtas.c
>> +++ b/hw/ppc/spapr_rtas.c
>> @@ -47,6 +47,7 @@
>> #include "trace.h"
>> #include "hw/ppc/fdt.h"
>> #include "kvm_ppc.h"
>> +#include "migration/blocker.h"
>>
>> static void rtas_display_character(PowerPCCPU *cpu, sPAPRMachineState *spapr,
>> uint32_t token, uint32_t nargs,
>> @@ -390,6 +391,8 @@ static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>> spapr->mc_status = -1;
>> qemu_cond_signal(&spapr->mc_delivery_cond);
>> rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> + migrate_del_blocker(spapr->migration_blocker);
>> + error_free(spapr->migration_blocker);
>> }
>> }
>>
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index a75e9cf..0890a44 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -7,6 +7,7 @@
>> #include "hw/ppc/spapr_drc.h"
>> #include "hw/mem/pc-dimm.h"
>> #include "hw/ppc/spapr_ovec.h"
>> +#include "qapi/error.h"
>>
>> struct VIOsPAPRBus;
>> struct sPAPRPHBState;
>> @@ -136,6 +137,7 @@ struct sPAPRMachineState {
>> MemoryHotplugState hotplug_memory;
>>
>> const char *icp_type;
>> + Error *migration_blocker;
>
> This isn't a good name, because it's _specifically_ the fwnmi as a
> migration blocker - trying to put another migration blocker in here
> would break horribly, because nmi-interlock would clear it regardless.
Will rename it.
>
>> };
>>
>> #define H_SUCCESS 0
>> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
>> index 59b3322..58de7ea 100644
>> --- a/target/ppc/kvm.c
>> +++ b/target/ppc/kvm.c
>> @@ -52,6 +52,7 @@
>> #endif
>> #include "elf.h"
>> #include "sysemu/kvm_int.h"
>> +#include "migration/blocker.h"
>>
>> //#define DEBUG_KVM
>>
>> @@ -2770,10 +2771,26 @@ int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run)
>> sPAPRMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>> PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
>> target_ulong msr = 0;
>> + Error *local_err = NULL;
>> + int ret;
>> bool type, le;
>>
>> cpu_synchronize_state(CPU(cpu));
>>
>> + error_setg(&spapr->migration_blocker,
>> + "Live migration not supported during machine check error handling");
>
> In fact, there's no real reason to generate the error here. The
> error's always the same so you could just create it at startup as a
> global and just add/remove it to the block list.
ok.
>
>> + ret = migrate_add_blocker(spapr->migration_blocker, &local_err);
>> + if (ret < 0) {
>> + /*
>> + * We don't want to abort and let the migration to continue. In a
>> + * rare case, the machine check handler will run on the target
>> + * hardware. Though this is not preferable, it is better than aborting
>
> Why is it not preferable? I mean it's an edge case, but AFAICT it's
> still the correct behaviour.
Will remove that line in the comment.
>
>> + * the migration or killing the VM.
>> + */
>> + error_free(spapr->migration_blocker);
>> + fprintf(stderr, "Warning: Machine check during VM migration\n");
>
> Use error_report(), not fprintf().
ok.
Regards,
Aravinda
>
>> + }
>> +
>> /*
>> * Properly set bits in MSR before we invoke the handler.
>> * SRR0/1, DAR and DSISR are properly set by KVM
>>
>
--
Regards,
Aravinda
© 2016 - 2025 Red Hat, Inc.