[v10] target-ppc/spapr: Add FWNMI support in QEMU for PowerKVM guests

[Qemu-devel] [PATCH v10 2/6] ppc: spapr: Introduce FWNMI capability

Posted by Aravinda Prasad 6 years, 8 months ago

Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
the KVM causes guest exit with NMI as exit reason
when it encounters a machine check exception on the
address belonging to a guest. Without this capability
enabled, KVM redirects machine check exceptions to
guest's 0x200 vector.

This patch also introduces fwnmi-mce capability to
deal with the case when a guest with the
KVM_CAP_PPC_FWNMI capability enabled is attempted
to migrate to a host that does not support this
capability.

Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
---
 hw/ppc/spapr.c         |    1 +
 hw/ppc/spapr_caps.c    |   26 ++++++++++++++++++++++++++
 include/hw/ppc/spapr.h |    4 +++-
 target/ppc/kvm.c       |   19 +++++++++++++++++++
 target/ppc/kvm_ppc.h   |   12 ++++++++++++
 5 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 6dd8aaa..2ef86aa 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -4360,6 +4360,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
     spapr_caps_add_properties(smc, &error_abort);
     smc->irq = &spapr_irq_dual;
     smc->dr_phb_enabled = true;
diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
index 31b4661..2e92eb6 100644
--- a/hw/ppc/spapr_caps.c
+++ b/hw/ppc/spapr_caps.c
@@ -479,6 +479,22 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
     }
 }
 
+static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
+                                Error **errp)
+{
+    if (!val) {
+        return; /* Disabled by default */
+    }
+
+    if (tcg_enabled()) {
+        error_setg(errp,
+"No Firmware Assisted Non-Maskable Interrupts support in TCG, try cap-fwnmi-mce=off");
+    } else if (kvm_enabled() && !kvmppc_has_cap_ppc_fwnmi()) {
+        error_setg(errp,
+"Firmware Assisted Non-Maskable Interrupts not supported by KVM, try cap-fwnmi-mce=off");
+    }
+}
+
 SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
     [SPAPR_CAP_HTM] = {
         .name = "htm",
@@ -578,6 +594,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
         .type = "bool",
         .apply = cap_ccf_assist_apply,
     },
+    [SPAPR_CAP_FWNMI_MCE] = {
+        .name = "fwnmi-mce",
+        .description = "Handle fwnmi machine check exceptions",
+        .index = SPAPR_CAP_FWNMI_MCE,
+        .get = spapr_cap_get_bool,
+        .set = spapr_cap_set_bool,
+        .type = "bool",
+        .apply = cap_fwnmi_mce_apply,
+    },
 };
 
 static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
@@ -717,6 +742,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
 SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
 SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
 SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
+SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
 
 void spapr_caps_init(SpaprMachineState *spapr)
 {
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 4f5becf..f891f8f 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -78,8 +78,10 @@ typedef enum {
 #define SPAPR_CAP_LARGE_DECREMENTER     0x08
 /* Count Cache Flush Assist HW Instruction */
 #define SPAPR_CAP_CCF_ASSIST            0x09
+/* FWNMI machine check handling */
+#define SPAPR_CAP_FWNMI_MCE             0x0A
 /* Num Caps */
-#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
+#define SPAPR_CAP_NUM                   (SPAPR_CAP_FWNMI_MCE + 1)
 
 /*
  * Capability Values
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 3bf0a46..afef4cd 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -84,6 +84,7 @@ static int cap_ppc_safe_indirect_branch;
 static int cap_ppc_count_cache_flush_assist;
 static int cap_ppc_nested_kvm_hv;
 static int cap_large_decr;
+static int cap_ppc_fwnmi;
 
 static uint32_t debug_inst_opcode;
 
@@ -152,6 +153,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
     kvmppc_get_cpu_characteristics(s);
     cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
     cap_large_decr = kvmppc_get_dec_bits();
+    cap_ppc_fwnmi = kvm_check_extension(s, KVM_CAP_PPC_FWNMI);
     /*
      * Note: setting it to false because there is not such capability
      * in KVM at this moment.
@@ -2114,6 +2116,18 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
     }
 }
 
+int kvmppc_fwnmi_enable(PowerPCCPU *cpu)
+{
+    CPUState *cs = CPU(cpu);
+
+    if (!cap_ppc_fwnmi) {
+        return 1;
+    }
+
+    return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
+}
+
+
 int kvmppc_smt_threads(void)
 {
     return cap_ppc_smt ? cap_ppc_smt : 1;
@@ -2414,6 +2428,11 @@ bool kvmppc_has_cap_mmu_hash_v3(void)
     return cap_mmu_hash_v3;
 }
 
+bool kvmppc_has_cap_ppc_fwnmi(void)
+{
+    return cap_ppc_fwnmi;
+}
+
 static bool kvmppc_power8_host(void)
 {
     bool ret = false;
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 45776ca..880cee9 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -27,6 +27,8 @@ void kvmppc_enable_h_page_init(void);
 void kvmppc_set_papr(PowerPCCPU *cpu);
 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
+int kvmppc_fwnmi_enable(PowerPCCPU *cpu);
+bool kvmppc_has_cap_ppc_fwnmi(void);
 int kvmppc_smt_threads(void);
 void kvmppc_hint_smt_possible(Error **errp);
 int kvmppc_set_smt_threads(int smt);
@@ -158,6 +160,16 @@ static inline void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
 {
 }
 
+static inline int kvmppc_fwnmi_enable(PowerPCCPU *cpu)
+{
+    return 1;
+}
+
+static inline bool kvmppc_has_cap_ppc_fwnmi(void)
+{
+    return false;
+}
+
 static inline int kvmppc_smt_threads(void)
 {
     return 1;

Re: [Qemu-devel] [PATCH v10 2/6] ppc: spapr: Introduce FWNMI capability

Posted by David Gibson 6 years, 7 months ago

On Wed, Jun 12, 2019 at 02:51:04PM +0530, Aravinda Prasad wrote:
> Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
> the KVM causes guest exit with NMI as exit reason
> when it encounters a machine check exception on the
> address belonging to a guest. Without this capability
> enabled, KVM redirects machine check exceptions to
> guest's 0x200 vector.
> 
> This patch also introduces fwnmi-mce capability to
> deal with the case when a guest with the
> KVM_CAP_PPC_FWNMI capability enabled is attempted
> to migrate to a host that does not support this
> capability.
> 
> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr.c         |    1 +
>  hw/ppc/spapr_caps.c    |   26 ++++++++++++++++++++++++++
>  include/hw/ppc/spapr.h |    4 +++-
>  target/ppc/kvm.c       |   19 +++++++++++++++++++
>  target/ppc/kvm_ppc.h   |   12 ++++++++++++
>  5 files changed, 61 insertions(+), 1 deletion(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 6dd8aaa..2ef86aa 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -4360,6 +4360,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>      spapr_caps_add_properties(smc, &error_abort);
>      smc->irq = &spapr_irq_dual;
>      smc->dr_phb_enabled = true;
> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> index 31b4661..2e92eb6 100644
> --- a/hw/ppc/spapr_caps.c
> +++ b/hw/ppc/spapr_caps.c
> @@ -479,6 +479,22 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
>      }
>  }
>  
> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
> +                                Error **errp)
> +{
> +    if (!val) {
> +        return; /* Disabled by default */
> +    }
> +
> +    if (tcg_enabled()) {
> +        error_setg(errp,
> +"No Firmware Assisted Non-Maskable Interrupts support in TCG, try cap-fwnmi-mce=off");

Not allowing this for TCG creates an awkward incompatibility between
KVM and TCG guests.  I can't actually see any reason to ban it for TCG
- with the current code TCG won't ever generate NMIs, but I don't see
that anything will actually break.

In fact, we do have an nmi monitor command, currently wired to the
spapr_nmi() function which resets each cpu, but it probably makes
sense to wire it up to the fwnmi stuff when present.

> +    } else if (kvm_enabled() && !kvmppc_has_cap_ppc_fwnmi()) {
> +        error_setg(errp,
> +"Firmware Assisted Non-Maskable Interrupts not supported by KVM, try cap-fwnmi-mce=off");
> +    }
> +}
> +
>  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>      [SPAPR_CAP_HTM] = {
>          .name = "htm",
> @@ -578,6 +594,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>          .type = "bool",
>          .apply = cap_ccf_assist_apply,
>      },
> +    [SPAPR_CAP_FWNMI_MCE] = {
> +        .name = "fwnmi-mce",
> +        .description = "Handle fwnmi machine check exceptions",
> +        .index = SPAPR_CAP_FWNMI_MCE,
> +        .get = spapr_cap_get_bool,
> +        .set = spapr_cap_set_bool,
> +        .type = "bool",
> +        .apply = cap_fwnmi_mce_apply,
> +    },
>  };
>  
>  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
> @@ -717,6 +742,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
> +SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
>  
>  void spapr_caps_init(SpaprMachineState *spapr)
>  {
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 4f5becf..f891f8f 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -78,8 +78,10 @@ typedef enum {
>  #define SPAPR_CAP_LARGE_DECREMENTER     0x08
>  /* Count Cache Flush Assist HW Instruction */
>  #define SPAPR_CAP_CCF_ASSIST            0x09
> +/* FWNMI machine check handling */
> +#define SPAPR_CAP_FWNMI_MCE             0x0A
>  /* Num Caps */
> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_FWNMI_MCE + 1)
>  
>  /*
>   * Capability Values
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index 3bf0a46..afef4cd 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -84,6 +84,7 @@ static int cap_ppc_safe_indirect_branch;
>  static int cap_ppc_count_cache_flush_assist;
>  static int cap_ppc_nested_kvm_hv;
>  static int cap_large_decr;
> +static int cap_ppc_fwnmi;
>  
>  static uint32_t debug_inst_opcode;
>  
> @@ -152,6 +153,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>      kvmppc_get_cpu_characteristics(s);
>      cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
>      cap_large_decr = kvmppc_get_dec_bits();
> +    cap_ppc_fwnmi = kvm_check_extension(s, KVM_CAP_PPC_FWNMI);
>      /*
>       * Note: setting it to false because there is not such capability
>       * in KVM at this moment.
> @@ -2114,6 +2116,18 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
>      }
>  }
>  
> +int kvmppc_fwnmi_enable(PowerPCCPU *cpu)
> +{
> +    CPUState *cs = CPU(cpu);
> +
> +    if (!cap_ppc_fwnmi) {
> +        return 1;
> +    }
> +
> +    return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
> +}
> +
> +
>  int kvmppc_smt_threads(void)
>  {
>      return cap_ppc_smt ? cap_ppc_smt : 1;
> @@ -2414,6 +2428,11 @@ bool kvmppc_has_cap_mmu_hash_v3(void)
>      return cap_mmu_hash_v3;
>  }
>  
> +bool kvmppc_has_cap_ppc_fwnmi(void)
> +{
> +    return cap_ppc_fwnmi;
> +}
> +
>  static bool kvmppc_power8_host(void)
>  {
>      bool ret = false;
> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> index 45776ca..880cee9 100644
> --- a/target/ppc/kvm_ppc.h
> +++ b/target/ppc/kvm_ppc.h
> @@ -27,6 +27,8 @@ void kvmppc_enable_h_page_init(void);
>  void kvmppc_set_papr(PowerPCCPU *cpu);
>  int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
>  void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
> +int kvmppc_fwnmi_enable(PowerPCCPU *cpu);
> +bool kvmppc_has_cap_ppc_fwnmi(void);
>  int kvmppc_smt_threads(void);
>  void kvmppc_hint_smt_possible(Error **errp);
>  int kvmppc_set_smt_threads(int smt);
> @@ -158,6 +160,16 @@ static inline void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
>  {
>  }
>  
> +static inline int kvmppc_fwnmi_enable(PowerPCCPU *cpu)
> +{
> +    return 1;
> +}
> +
> +static inline bool kvmppc_has_cap_ppc_fwnmi(void)
> +{
> +    return false;
> +}
> +
>  static inline int kvmppc_smt_threads(void)
>  {
>      return 1;
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

Re: [Qemu-devel] [PATCH v10 2/6] ppc: spapr: Introduce FWNMI capability

Posted by Aravinda Prasad 6 years, 7 months ago


On Tuesday 02 July 2019 09:21 AM, David Gibson wrote:
> On Wed, Jun 12, 2019 at 02:51:04PM +0530, Aravinda Prasad wrote:
>> Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
>> the KVM causes guest exit with NMI as exit reason
>> when it encounters a machine check exception on the
>> address belonging to a guest. Without this capability
>> enabled, KVM redirects machine check exceptions to
>> guest's 0x200 vector.
>>
>> This patch also introduces fwnmi-mce capability to
>> deal with the case when a guest with the
>> KVM_CAP_PPC_FWNMI capability enabled is attempted
>> to migrate to a host that does not support this
>> capability.
>>
>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>> ---
>>  hw/ppc/spapr.c         |    1 +
>>  hw/ppc/spapr_caps.c    |   26 ++++++++++++++++++++++++++
>>  include/hw/ppc/spapr.h |    4 +++-
>>  target/ppc/kvm.c       |   19 +++++++++++++++++++
>>  target/ppc/kvm_ppc.h   |   12 ++++++++++++
>>  5 files changed, 61 insertions(+), 1 deletion(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 6dd8aaa..2ef86aa 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -4360,6 +4360,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>>      spapr_caps_add_properties(smc, &error_abort);
>>      smc->irq = &spapr_irq_dual;
>>      smc->dr_phb_enabled = true;
>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
>> index 31b4661..2e92eb6 100644
>> --- a/hw/ppc/spapr_caps.c
>> +++ b/hw/ppc/spapr_caps.c
>> @@ -479,6 +479,22 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
>>      }
>>  }
>>  
>> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
>> +                                Error **errp)
>> +{
>> +    if (!val) {
>> +        return; /* Disabled by default */
>> +    }
>> +
>> +    if (tcg_enabled()) {
>> +        error_setg(errp,
>> +"No Firmware Assisted Non-Maskable Interrupts support in TCG, try cap-fwnmi-mce=off");
> 
> Not allowing this for TCG creates an awkward incompatibility between
> KVM and TCG guests.  I can't actually see any reason to ban it for TCG
> - with the current code TCG won't ever generate NMIs, but I don't see
> that anything will actually break.
> 
> In fact, we do have an nmi monitor command, currently wired to the
> spapr_nmi() function which resets each cpu, but it probably makes
> sense to wire it up to the fwnmi stuff when present.

Yes, but that nmi support is not enough to inject a synchronous error
into the guest kernel. For example, we should provide the faulty address
along with other information such as the type of error (slb multi-hit,
memory error, TLB multi-hit) and when the error occurred (load/store)
and whether the error was completely recovered or not. Without such
information we cannot build the error log and pass it on to the guest
kernel. Right now nmi monitor command takes cpu number as the only argument.

So I think TCG support should be a separate patch by itself.

Regards,
Aravinda

> 
>> +    } else if (kvm_enabled() && !kvmppc_has_cap_ppc_fwnmi()) {
>> +        error_setg(errp,
>> +"Firmware Assisted Non-Maskable Interrupts not supported by KVM, try cap-fwnmi-mce=off");
>> +    }
>> +}
>> +
>>  SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>>      [SPAPR_CAP_HTM] = {
>>          .name = "htm",
>> @@ -578,6 +594,15 @@ SpaprCapabilityInfo capability_table[SPAPR_CAP_NUM] = {
>>          .type = "bool",
>>          .apply = cap_ccf_assist_apply,
>>      },
>> +    [SPAPR_CAP_FWNMI_MCE] = {
>> +        .name = "fwnmi-mce",
>> +        .description = "Handle fwnmi machine check exceptions",
>> +        .index = SPAPR_CAP_FWNMI_MCE,
>> +        .get = spapr_cap_get_bool,
>> +        .set = spapr_cap_set_bool,
>> +        .type = "bool",
>> +        .apply = cap_fwnmi_mce_apply,
>> +    },
>>  };
>>  
>>  static SpaprCapabilities default_caps_with_cpu(SpaprMachineState *spapr,
>> @@ -717,6 +742,7 @@ SPAPR_CAP_MIG_STATE(hpt_maxpagesize, SPAPR_CAP_HPT_MAXPAGESIZE);
>>  SPAPR_CAP_MIG_STATE(nested_kvm_hv, SPAPR_CAP_NESTED_KVM_HV);
>>  SPAPR_CAP_MIG_STATE(large_decr, SPAPR_CAP_LARGE_DECREMENTER);
>>  SPAPR_CAP_MIG_STATE(ccf_assist, SPAPR_CAP_CCF_ASSIST);
>> +SPAPR_CAP_MIG_STATE(fwnmi, SPAPR_CAP_FWNMI_MCE);
>>  
>>  void spapr_caps_init(SpaprMachineState *spapr)
>>  {
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 4f5becf..f891f8f 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -78,8 +78,10 @@ typedef enum {
>>  #define SPAPR_CAP_LARGE_DECREMENTER     0x08
>>  /* Count Cache Flush Assist HW Instruction */
>>  #define SPAPR_CAP_CCF_ASSIST            0x09
>> +/* FWNMI machine check handling */
>> +#define SPAPR_CAP_FWNMI_MCE             0x0A
>>  /* Num Caps */
>> -#define SPAPR_CAP_NUM                   (SPAPR_CAP_CCF_ASSIST + 1)
>> +#define SPAPR_CAP_NUM                   (SPAPR_CAP_FWNMI_MCE + 1)
>>  
>>  /*
>>   * Capability Values
>> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
>> index 3bf0a46..afef4cd 100644
>> --- a/target/ppc/kvm.c
>> +++ b/target/ppc/kvm.c
>> @@ -84,6 +84,7 @@ static int cap_ppc_safe_indirect_branch;
>>  static int cap_ppc_count_cache_flush_assist;
>>  static int cap_ppc_nested_kvm_hv;
>>  static int cap_large_decr;
>> +static int cap_ppc_fwnmi;
>>  
>>  static uint32_t debug_inst_opcode;
>>  
>> @@ -152,6 +153,7 @@ int kvm_arch_init(MachineState *ms, KVMState *s)
>>      kvmppc_get_cpu_characteristics(s);
>>      cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
>>      cap_large_decr = kvmppc_get_dec_bits();
>> +    cap_ppc_fwnmi = kvm_check_extension(s, KVM_CAP_PPC_FWNMI);
>>      /*
>>       * Note: setting it to false because there is not such capability
>>       * in KVM at this moment.
>> @@ -2114,6 +2116,18 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
>>      }
>>  }
>>  
>> +int kvmppc_fwnmi_enable(PowerPCCPU *cpu)
>> +{
>> +    CPUState *cs = CPU(cpu);
>> +
>> +    if (!cap_ppc_fwnmi) {
>> +        return 1;
>> +    }
>> +
>> +    return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0);
>> +}
>> +
>> +
>>  int kvmppc_smt_threads(void)
>>  {
>>      return cap_ppc_smt ? cap_ppc_smt : 1;
>> @@ -2414,6 +2428,11 @@ bool kvmppc_has_cap_mmu_hash_v3(void)
>>      return cap_mmu_hash_v3;
>>  }
>>  
>> +bool kvmppc_has_cap_ppc_fwnmi(void)
>> +{
>> +    return cap_ppc_fwnmi;
>> +}
>> +
>>  static bool kvmppc_power8_host(void)
>>  {
>>      bool ret = false;
>> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
>> index 45776ca..880cee9 100644
>> --- a/target/ppc/kvm_ppc.h
>> +++ b/target/ppc/kvm_ppc.h
>> @@ -27,6 +27,8 @@ void kvmppc_enable_h_page_init(void);
>>  void kvmppc_set_papr(PowerPCCPU *cpu);
>>  int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr);
>>  void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy);
>> +int kvmppc_fwnmi_enable(PowerPCCPU *cpu);
>> +bool kvmppc_has_cap_ppc_fwnmi(void);
>>  int kvmppc_smt_threads(void);
>>  void kvmppc_hint_smt_possible(Error **errp);
>>  int kvmppc_set_smt_threads(int smt);
>> @@ -158,6 +160,16 @@ static inline void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
>>  {
>>  }
>>  
>> +static inline int kvmppc_fwnmi_enable(PowerPCCPU *cpu)
>> +{
>> +    return 1;
>> +}
>> +
>> +static inline bool kvmppc_has_cap_ppc_fwnmi(void)
>> +{
>> +    return false;
>> +}
>> +
>>  static inline int kvmppc_smt_threads(void)
>>  {
>>      return 1;
>>
> 

-- 
Regards,
Aravinda

Re: [Qemu-devel] [PATCH v10 2/6] ppc: spapr: Introduce FWNMI capability

Posted by David Gibson 6 years, 7 months ago

On Tue, Jul 02, 2019 at 11:54:26AM +0530, Aravinda Prasad wrote:
> 
> 
> On Tuesday 02 July 2019 09:21 AM, David Gibson wrote:
> > On Wed, Jun 12, 2019 at 02:51:04PM +0530, Aravinda Prasad wrote:
> >> Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
> >> the KVM causes guest exit with NMI as exit reason
> >> when it encounters a machine check exception on the
> >> address belonging to a guest. Without this capability
> >> enabled, KVM redirects machine check exceptions to
> >> guest's 0x200 vector.
> >>
> >> This patch also introduces fwnmi-mce capability to
> >> deal with the case when a guest with the
> >> KVM_CAP_PPC_FWNMI capability enabled is attempted
> >> to migrate to a host that does not support this
> >> capability.
> >>
> >> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >> ---
> >>  hw/ppc/spapr.c         |    1 +
> >>  hw/ppc/spapr_caps.c    |   26 ++++++++++++++++++++++++++
> >>  include/hw/ppc/spapr.h |    4 +++-
> >>  target/ppc/kvm.c       |   19 +++++++++++++++++++
> >>  target/ppc/kvm_ppc.h   |   12 ++++++++++++
> >>  5 files changed, 61 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index 6dd8aaa..2ef86aa 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -4360,6 +4360,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> >> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >>      spapr_caps_add_properties(smc, &error_abort);
> >>      smc->irq = &spapr_irq_dual;
> >>      smc->dr_phb_enabled = true;
> >> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> >> index 31b4661..2e92eb6 100644
> >> --- a/hw/ppc/spapr_caps.c
> >> +++ b/hw/ppc/spapr_caps.c
> >> @@ -479,6 +479,22 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
> >>      }
> >>  }
> >>  
> >> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
> >> +                                Error **errp)
> >> +{
> >> +    if (!val) {
> >> +        return; /* Disabled by default */
> >> +    }
> >> +
> >> +    if (tcg_enabled()) {
> >> +        error_setg(errp,
> >> +"No Firmware Assisted Non-Maskable Interrupts support in TCG, try cap-fwnmi-mce=off");
> > 
> > Not allowing this for TCG creates an awkward incompatibility between
> > KVM and TCG guests.  I can't actually see any reason to ban it for TCG
> > - with the current code TCG won't ever generate NMIs, but I don't see
> > that anything will actually break.
> > 
> > In fact, we do have an nmi monitor command, currently wired to the
> > spapr_nmi() function which resets each cpu, but it probably makes
> > sense to wire it up to the fwnmi stuff when present.
> 
> Yes, but that nmi support is not enough to inject a synchronous error
> into the guest kernel. For example, we should provide the faulty address
> along with other information such as the type of error (slb multi-hit,
> memory error, TLB multi-hit) and when the error occurred (load/store)
> and whether the error was completely recovered or not. Without such
> information we cannot build the error log and pass it on to the guest
> kernel. Right now nmi monitor command takes cpu number as the only argument.

Obviously we can't inject an arbitrary MCE event with that monitor
command.  But isn't there some sort of catch-all / unknown type of MCE
event which we could inject?

It seems very confusing to me to have 2 totally separate "nmi"
mechanisms.

> So I think TCG support should be a separate patch by itself.

Even if we don't wire up the monitor command, I still don't see
anything that this patch breaks - we can support the nmi-register and
nmi-interlock calls without ever actually creating MCE events.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

Re: [Qemu-devel] [PATCH v10 2/6] ppc: spapr: Introduce FWNMI capability

Posted by Aravinda Prasad 6 years, 7 months ago


On Wednesday 03 July 2019 08:33 AM, David Gibson wrote:
> On Tue, Jul 02, 2019 at 11:54:26AM +0530, Aravinda Prasad wrote:
>>
>>
>> On Tuesday 02 July 2019 09:21 AM, David Gibson wrote:
>>> On Wed, Jun 12, 2019 at 02:51:04PM +0530, Aravinda Prasad wrote:
>>>> Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
>>>> the KVM causes guest exit with NMI as exit reason
>>>> when it encounters a machine check exception on the
>>>> address belonging to a guest. Without this capability
>>>> enabled, KVM redirects machine check exceptions to
>>>> guest's 0x200 vector.
>>>>
>>>> This patch also introduces fwnmi-mce capability to
>>>> deal with the case when a guest with the
>>>> KVM_CAP_PPC_FWNMI capability enabled is attempted
>>>> to migrate to a host that does not support this
>>>> capability.
>>>>
>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>>>> ---
>>>>  hw/ppc/spapr.c         |    1 +
>>>>  hw/ppc/spapr_caps.c    |   26 ++++++++++++++++++++++++++
>>>>  include/hw/ppc/spapr.h |    4 +++-
>>>>  target/ppc/kvm.c       |   19 +++++++++++++++++++
>>>>  target/ppc/kvm_ppc.h   |   12 ++++++++++++
>>>>  5 files changed, 61 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>> index 6dd8aaa..2ef86aa 100644
>>>> --- a/hw/ppc/spapr.c
>>>> +++ b/hw/ppc/spapr.c
>>>> @@ -4360,6 +4360,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>>>>      spapr_caps_add_properties(smc, &error_abort);
>>>>      smc->irq = &spapr_irq_dual;
>>>>      smc->dr_phb_enabled = true;
>>>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
>>>> index 31b4661..2e92eb6 100644
>>>> --- a/hw/ppc/spapr_caps.c
>>>> +++ b/hw/ppc/spapr_caps.c
>>>> @@ -479,6 +479,22 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
>>>>      }
>>>>  }
>>>>  
>>>> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
>>>> +                                Error **errp)
>>>> +{
>>>> +    if (!val) {
>>>> +        return; /* Disabled by default */
>>>> +    }
>>>> +
>>>> +    if (tcg_enabled()) {
>>>> +        error_setg(errp,
>>>> +"No Firmware Assisted Non-Maskable Interrupts support in TCG, try cap-fwnmi-mce=off");
>>>
>>> Not allowing this for TCG creates an awkward incompatibility between
>>> KVM and TCG guests.  I can't actually see any reason to ban it for TCG
>>> - with the current code TCG won't ever generate NMIs, but I don't see
>>> that anything will actually break.
>>>
>>> In fact, we do have an nmi monitor command, currently wired to the
>>> spapr_nmi() function which resets each cpu, but it probably makes
>>> sense to wire it up to the fwnmi stuff when present.
>>
>> Yes, but that nmi support is not enough to inject a synchronous error
>> into the guest kernel. For example, we should provide the faulty address
>> along with other information such as the type of error (slb multi-hit,
>> memory error, TLB multi-hit) and when the error occurred (load/store)
>> and whether the error was completely recovered or not. Without such
>> information we cannot build the error log and pass it on to the guest
>> kernel. Right now nmi monitor command takes cpu number as the only argument.
> 
> Obviously we can't inject an arbitrary MCE event with that monitor
> command.  But isn't there some sort of catch-all / unknown type of MCE
> event which we could inject?

We have "unknown" type of error, but we should also pass an address in
the MCE event log. Strictly speaking this address should be a valid
address in the current CPU context as MCEs are synchronous errors
triggered when we touch a bad address.

We can pass a default address with every nmi, but I am not sure whether
that will be practically helpful.

> 
> It seems very confusing to me to have 2 totally separate "nmi"
> mechanisms.
> 
>> So I think TCG support should be a separate patch by itself.
> 
> Even if we don't wire up the monitor command, I still don't see
> anything that this patch breaks - we can support the nmi-register and
> nmi-interlock calls without ever actually creating MCE events.

If we support nmi-register and nmi-interlock calls without the monitor
command wire-up then we will be falsely claiming the nmi support to the
guest while it is not actually supported.

Regards,
Aravinda



> 

-- 
Regards,
Aravinda

Re: [Qemu-devel] [PATCH v10 2/6] ppc: spapr: Introduce FWNMI capability

Posted by David Gibson 6 years, 7 months ago

On Wed, Jul 03, 2019 at 02:58:24PM +0530, Aravinda Prasad wrote:
> 
> 
> On Wednesday 03 July 2019 08:33 AM, David Gibson wrote:
> > On Tue, Jul 02, 2019 at 11:54:26AM +0530, Aravinda Prasad wrote:
> >>
> >>
> >> On Tuesday 02 July 2019 09:21 AM, David Gibson wrote:
> >>> On Wed, Jun 12, 2019 at 02:51:04PM +0530, Aravinda Prasad wrote:
> >>>> Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
> >>>> the KVM causes guest exit with NMI as exit reason
> >>>> when it encounters a machine check exception on the
> >>>> address belonging to a guest. Without this capability
> >>>> enabled, KVM redirects machine check exceptions to
> >>>> guest's 0x200 vector.
> >>>>
> >>>> This patch also introduces fwnmi-mce capability to
> >>>> deal with the case when a guest with the
> >>>> KVM_CAP_PPC_FWNMI capability enabled is attempted
> >>>> to migrate to a host that does not support this
> >>>> capability.
> >>>>
> >>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >>>> ---
> >>>>  hw/ppc/spapr.c         |    1 +
> >>>>  hw/ppc/spapr_caps.c    |   26 ++++++++++++++++++++++++++
> >>>>  include/hw/ppc/spapr.h |    4 +++-
> >>>>  target/ppc/kvm.c       |   19 +++++++++++++++++++
> >>>>  target/ppc/kvm_ppc.h   |   12 ++++++++++++
> >>>>  5 files changed, 61 insertions(+), 1 deletion(-)
> >>>>
> >>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >>>> index 6dd8aaa..2ef86aa 100644
> >>>> --- a/hw/ppc/spapr.c
> >>>> +++ b/hw/ppc/spapr.c
> >>>> @@ -4360,6 +4360,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> >>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >>>>      spapr_caps_add_properties(smc, &error_abort);
> >>>>      smc->irq = &spapr_irq_dual;
> >>>>      smc->dr_phb_enabled = true;
> >>>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> >>>> index 31b4661..2e92eb6 100644
> >>>> --- a/hw/ppc/spapr_caps.c
> >>>> +++ b/hw/ppc/spapr_caps.c
> >>>> @@ -479,6 +479,22 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
> >>>>      }
> >>>>  }
> >>>>  
> >>>> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
> >>>> +                                Error **errp)
> >>>> +{
> >>>> +    if (!val) {
> >>>> +        return; /* Disabled by default */
> >>>> +    }
> >>>> +
> >>>> +    if (tcg_enabled()) {
> >>>> +        error_setg(errp,
> >>>> +"No Firmware Assisted Non-Maskable Interrupts support in TCG, try cap-fwnmi-mce=off");
> >>>
> >>> Not allowing this for TCG creates an awkward incompatibility between
> >>> KVM and TCG guests.  I can't actually see any reason to ban it for TCG
> >>> - with the current code TCG won't ever generate NMIs, but I don't see
> >>> that anything will actually break.
> >>>
> >>> In fact, we do have an nmi monitor command, currently wired to the
> >>> spapr_nmi() function which resets each cpu, but it probably makes
> >>> sense to wire it up to the fwnmi stuff when present.
> >>
> >> Yes, but that nmi support is not enough to inject a synchronous error
> >> into the guest kernel. For example, we should provide the faulty address
> >> along with other information such as the type of error (slb multi-hit,
> >> memory error, TLB multi-hit) and when the error occurred (load/store)
> >> and whether the error was completely recovered or not. Without such
> >> information we cannot build the error log and pass it on to the guest
> >> kernel. Right now nmi monitor command takes cpu number as the only argument.
> > 
> > Obviously we can't inject an arbitrary MCE event with that monitor
> > command.  But isn't there some sort of catch-all / unknown type of MCE
> > event which we could inject?
> 
> We have "unknown" type of error, but we should also pass an address in
> the MCE event log. Strictly speaking this address should be a valid
> address in the current CPU context as MCEs are synchronous errors
> triggered when we touch a bad address.

Well, some of them are.  At least historically both synchronous and
asnchronous MCEs were possible.  Are there really no versions where
you can report an MCE with unknown address?

> We can pass a default address with every nmi, but I am not sure whether
> that will be practically helpful.
> 
> > It seems very confusing to me to have 2 totally separate "nmi"
> > mechanisms.
> > 
> >> So I think TCG support should be a separate patch by itself.
> > 
> > Even if we don't wire up the monitor command, I still don't see
> > anything that this patch breaks - we can support the nmi-register and
> > nmi-interlock calls without ever actually creating MCE events.
> 
> If we support nmi-register and nmi-interlock calls without the monitor
> command wire-up then we will be falsely claiming the nmi support to the
> guest while it is not actually supported.

How so?  AFAICT, from the point of view of the guest this is not
observably different from supporting the NMI mechanism but NMIs never
occurring.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

Re: [Qemu-devel] [PATCH v10 2/6] ppc: spapr: Introduce FWNMI capability

Posted by Aravinda Prasad 6 years, 7 months ago


On Thursday 04 July 2019 06:37 AM, David Gibson wrote:
> On Wed, Jul 03, 2019 at 02:58:24PM +0530, Aravinda Prasad wrote:
>>
>>
>> On Wednesday 03 July 2019 08:33 AM, David Gibson wrote:
>>> On Tue, Jul 02, 2019 at 11:54:26AM +0530, Aravinda Prasad wrote:
>>>>
>>>>
>>>> On Tuesday 02 July 2019 09:21 AM, David Gibson wrote:
>>>>> On Wed, Jun 12, 2019 at 02:51:04PM +0530, Aravinda Prasad wrote:
>>>>>> Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
>>>>>> the KVM causes guest exit with NMI as exit reason
>>>>>> when it encounters a machine check exception on the
>>>>>> address belonging to a guest. Without this capability
>>>>>> enabled, KVM redirects machine check exceptions to
>>>>>> guest's 0x200 vector.
>>>>>>
>>>>>> This patch also introduces fwnmi-mce capability to
>>>>>> deal with the case when a guest with the
>>>>>> KVM_CAP_PPC_FWNMI capability enabled is attempted
>>>>>> to migrate to a host that does not support this
>>>>>> capability.
>>>>>>
>>>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>>>>>> ---
>>>>>>  hw/ppc/spapr.c         |    1 +
>>>>>>  hw/ppc/spapr_caps.c    |   26 ++++++++++++++++++++++++++
>>>>>>  include/hw/ppc/spapr.h |    4 +++-
>>>>>>  target/ppc/kvm.c       |   19 +++++++++++++++++++
>>>>>>  target/ppc/kvm_ppc.h   |   12 ++++++++++++
>>>>>>  5 files changed, 61 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>>>> index 6dd8aaa..2ef86aa 100644
>>>>>> --- a/hw/ppc/spapr.c
>>>>>> +++ b/hw/ppc/spapr.c
>>>>>> @@ -4360,6 +4360,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>>>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>>>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
>>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>>>>>>      spapr_caps_add_properties(smc, &error_abort);
>>>>>>      smc->irq = &spapr_irq_dual;
>>>>>>      smc->dr_phb_enabled = true;
>>>>>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
>>>>>> index 31b4661..2e92eb6 100644
>>>>>> --- a/hw/ppc/spapr_caps.c
>>>>>> +++ b/hw/ppc/spapr_caps.c
>>>>>> @@ -479,6 +479,22 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
>>>>>>      }
>>>>>>  }
>>>>>>  
>>>>>> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
>>>>>> +                                Error **errp)
>>>>>> +{
>>>>>> +    if (!val) {
>>>>>> +        return; /* Disabled by default */
>>>>>> +    }
>>>>>> +
>>>>>> +    if (tcg_enabled()) {
>>>>>> +        error_setg(errp,
>>>>>> +"No Firmware Assisted Non-Maskable Interrupts support in TCG, try cap-fwnmi-mce=off");
>>>>>
>>>>> Not allowing this for TCG creates an awkward incompatibility between
>>>>> KVM and TCG guests.  I can't actually see any reason to ban it for TCG
>>>>> - with the current code TCG won't ever generate NMIs, but I don't see
>>>>> that anything will actually break.
>>>>>
>>>>> In fact, we do have an nmi monitor command, currently wired to the
>>>>> spapr_nmi() function which resets each cpu, but it probably makes
>>>>> sense to wire it up to the fwnmi stuff when present.
>>>>
>>>> Yes, but that nmi support is not enough to inject a synchronous error
>>>> into the guest kernel. For example, we should provide the faulty address
>>>> along with other information such as the type of error (slb multi-hit,
>>>> memory error, TLB multi-hit) and when the error occurred (load/store)
>>>> and whether the error was completely recovered or not. Without such
>>>> information we cannot build the error log and pass it on to the guest
>>>> kernel. Right now nmi monitor command takes cpu number as the only argument.
>>>
>>> Obviously we can't inject an arbitrary MCE event with that monitor
>>> command.  But isn't there some sort of catch-all / unknown type of MCE
>>> event which we could inject?
>>
>> We have "unknown" type of error, but we should also pass an address in
>> the MCE event log. Strictly speaking this address should be a valid
>> address in the current CPU context as MCEs are synchronous errors
>> triggered when we touch a bad address.
> 
> Well, some of them are.  At least historically both synchronous and
> asnchronous MCEs were possible.  Are there really no versions where
> you can report an MCE with unknown address?

I am not aware of any such versions. Will cross check.

> 
>> We can pass a default address with every nmi, but I am not sure whether
>> that will be practically helpful.
>>
>>> It seems very confusing to me to have 2 totally separate "nmi"
>>> mechanisms.
>>>
>>>> So I think TCG support should be a separate patch by itself.
>>>
>>> Even if we don't wire up the monitor command, I still don't see
>>> anything that this patch breaks - we can support the nmi-register and
>>> nmi-interlock calls without ever actually creating MCE events.
>>
>> If we support nmi-register and nmi-interlock calls without the monitor
>> command wire-up then we will be falsely claiming the nmi support to the
>> guest while it is not actually supported.
> 
> How so?  AFAICT, from the point of view of the guest this is not
> observably different from supporting the NMI mechanism but NMIs never
> occurring.

A guest inserting a duplicate SLB will expect the machine check
exception delivered to the handler registered via nmi,register.
But we actually don't do that in TCG.

> 

-- 
Regards,
Aravinda

Re: [Qemu-devel] [PATCH v10 2/6] ppc: spapr: Introduce FWNMI capability

Posted by David Gibson 6 years, 7 months ago

On Thu, Jul 04, 2019 at 10:33:11AM +0530, Aravinda Prasad wrote:
> 
> 
> On Thursday 04 July 2019 06:37 AM, David Gibson wrote:
> > On Wed, Jul 03, 2019 at 02:58:24PM +0530, Aravinda Prasad wrote:
> >>
> >>
> >> On Wednesday 03 July 2019 08:33 AM, David Gibson wrote:
> >>> On Tue, Jul 02, 2019 at 11:54:26AM +0530, Aravinda Prasad wrote:
> >>>>
> >>>>
> >>>> On Tuesday 02 July 2019 09:21 AM, David Gibson wrote:
> >>>>> On Wed, Jun 12, 2019 at 02:51:04PM +0530, Aravinda Prasad wrote:
> >>>>>> Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
> >>>>>> the KVM causes guest exit with NMI as exit reason
> >>>>>> when it encounters a machine check exception on the
> >>>>>> address belonging to a guest. Without this capability
> >>>>>> enabled, KVM redirects machine check exceptions to
> >>>>>> guest's 0x200 vector.
> >>>>>>
> >>>>>> This patch also introduces fwnmi-mce capability to
> >>>>>> deal with the case when a guest with the
> >>>>>> KVM_CAP_PPC_FWNMI capability enabled is attempted
> >>>>>> to migrate to a host that does not support this
> >>>>>> capability.
> >>>>>>
> >>>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >>>>>> ---
> >>>>>>  hw/ppc/spapr.c         |    1 +
> >>>>>>  hw/ppc/spapr_caps.c    |   26 ++++++++++++++++++++++++++
> >>>>>>  include/hw/ppc/spapr.h |    4 +++-
> >>>>>>  target/ppc/kvm.c       |   19 +++++++++++++++++++
> >>>>>>  target/ppc/kvm_ppc.h   |   12 ++++++++++++
> >>>>>>  5 files changed, 61 insertions(+), 1 deletion(-)
> >>>>>>
> >>>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >>>>>> index 6dd8aaa..2ef86aa 100644
> >>>>>> --- a/hw/ppc/spapr.c
> >>>>>> +++ b/hw/ppc/spapr.c
> >>>>>> @@ -4360,6 +4360,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> >>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >>>>>>      spapr_caps_add_properties(smc, &error_abort);
> >>>>>>      smc->irq = &spapr_irq_dual;
> >>>>>>      smc->dr_phb_enabled = true;
> >>>>>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> >>>>>> index 31b4661..2e92eb6 100644
> >>>>>> --- a/hw/ppc/spapr_caps.c
> >>>>>> +++ b/hw/ppc/spapr_caps.c
> >>>>>> @@ -479,6 +479,22 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
> >>>>>>      }
> >>>>>>  }
> >>>>>>  
> >>>>>> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
> >>>>>> +                                Error **errp)
> >>>>>> +{
> >>>>>> +    if (!val) {
> >>>>>> +        return; /* Disabled by default */
> >>>>>> +    }
> >>>>>> +
> >>>>>> +    if (tcg_enabled()) {
> >>>>>> +        error_setg(errp,
> >>>>>> +"No Firmware Assisted Non-Maskable Interrupts support in TCG, try cap-fwnmi-mce=off");
> >>>>>
> >>>>> Not allowing this for TCG creates an awkward incompatibility between
> >>>>> KVM and TCG guests.  I can't actually see any reason to ban it for TCG
> >>>>> - with the current code TCG won't ever generate NMIs, but I don't see
> >>>>> that anything will actually break.
> >>>>>
> >>>>> In fact, we do have an nmi monitor command, currently wired to the
> >>>>> spapr_nmi() function which resets each cpu, but it probably makes
> >>>>> sense to wire it up to the fwnmi stuff when present.
> >>>>
> >>>> Yes, but that nmi support is not enough to inject a synchronous error
> >>>> into the guest kernel. For example, we should provide the faulty address
> >>>> along with other information such as the type of error (slb multi-hit,
> >>>> memory error, TLB multi-hit) and when the error occurred (load/store)
> >>>> and whether the error was completely recovered or not. Without such
> >>>> information we cannot build the error log and pass it on to the guest
> >>>> kernel. Right now nmi monitor command takes cpu number as the only argument.
> >>>
> >>> Obviously we can't inject an arbitrary MCE event with that monitor
> >>> command.  But isn't there some sort of catch-all / unknown type of MCE
> >>> event which we could inject?
> >>
> >> We have "unknown" type of error, but we should also pass an address in
> >> the MCE event log. Strictly speaking this address should be a valid
> >> address in the current CPU context as MCEs are synchronous errors
> >> triggered when we touch a bad address.
> > 
> > Well, some of them are.  At least historically both synchronous and
> > asnchronous MCEs were possible.  Are there really no versions where
> > you can report an MCE with unknown address?
> 
> I am not aware of any such versions. Will cross check.
> 
> > 
> >> We can pass a default address with every nmi, but I am not sure whether
> >> that will be practically helpful.
> >>
> >>> It seems very confusing to me to have 2 totally separate "nmi"
> >>> mechanisms.
> >>>
> >>>> So I think TCG support should be a separate patch by itself.
> >>>
> >>> Even if we don't wire up the monitor command, I still don't see
> >>> anything that this patch breaks - we can support the nmi-register and
> >>> nmi-interlock calls without ever actually creating MCE events.
> >>
> >> If we support nmi-register and nmi-interlock calls without the monitor
> >> command wire-up then we will be falsely claiming the nmi support to the
> >> guest while it is not actually supported.
> > 
> > How so?  AFAICT, from the point of view of the guest this is not
> > observably different from supporting the NMI mechanism but NMIs never
> > occurring.
> 
> A guest inserting a duplicate SLB will expect the machine check
> exception delivered to the handler registered via nmi,register.
> But we actually don't do that in TCG.

Ah, true, I was thinking of external hardware fault triggered MCEs
rather than software error ones like duplicate SLB.

That said, I strongly suspect TCG is buggy enough at present that
exact behaviour in rare error conditions like duplicate SLB is not
really a big problem in the scheme of things.

I really don't think we can enable this by default until we allow it
for TCG - we don't want starting a TCG guest to involve manually
switching other options.

We could consider allowing it for TCG but just printing a warning that
the behaviour may not be correct in some conditions - we do something
similar for some of the Spectre workarounds already.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

Re: [Qemu-devel] [PATCH v10 2/6] ppc: spapr: Introduce FWNMI capability

Posted by Aravinda Prasad 6 years, 7 months ago


On Friday 05 July 2019 06:37 AM, David Gibson wrote:
> On Thu, Jul 04, 2019 at 10:33:11AM +0530, Aravinda Prasad wrote:
>>
>>
>> On Thursday 04 July 2019 06:37 AM, David Gibson wrote:
>>> On Wed, Jul 03, 2019 at 02:58:24PM +0530, Aravinda Prasad wrote:
>>>>
>>>>
>>>> On Wednesday 03 July 2019 08:33 AM, David Gibson wrote:
>>>>> On Tue, Jul 02, 2019 at 11:54:26AM +0530, Aravinda Prasad wrote:
>>>>>>
>>>>>>
>>>>>> On Tuesday 02 July 2019 09:21 AM, David Gibson wrote:
>>>>>>> On Wed, Jun 12, 2019 at 02:51:04PM +0530, Aravinda Prasad wrote:
>>>>>>>> Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
>>>>>>>> the KVM causes guest exit with NMI as exit reason
>>>>>>>> when it encounters a machine check exception on the
>>>>>>>> address belonging to a guest. Without this capability
>>>>>>>> enabled, KVM redirects machine check exceptions to
>>>>>>>> guest's 0x200 vector.
>>>>>>>>
>>>>>>>> This patch also introduces fwnmi-mce capability to
>>>>>>>> deal with the case when a guest with the
>>>>>>>> KVM_CAP_PPC_FWNMI capability enabled is attempted
>>>>>>>> to migrate to a host that does not support this
>>>>>>>> capability.
>>>>>>>>
>>>>>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>>>>>>>> ---
>>>>>>>>  hw/ppc/spapr.c         |    1 +
>>>>>>>>  hw/ppc/spapr_caps.c    |   26 ++++++++++++++++++++++++++
>>>>>>>>  include/hw/ppc/spapr.h |    4 +++-
>>>>>>>>  target/ppc/kvm.c       |   19 +++++++++++++++++++
>>>>>>>>  target/ppc/kvm_ppc.h   |   12 ++++++++++++
>>>>>>>>  5 files changed, 61 insertions(+), 1 deletion(-)
>>>>>>>>
>>>>>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>>>>>> index 6dd8aaa..2ef86aa 100644
>>>>>>>> --- a/hw/ppc/spapr.c
>>>>>>>> +++ b/hw/ppc/spapr.c
>>>>>>>> @@ -4360,6 +4360,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>>>>>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>>>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>>>>>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
>>>>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>>>>>>>>      spapr_caps_add_properties(smc, &error_abort);
>>>>>>>>      smc->irq = &spapr_irq_dual;
>>>>>>>>      smc->dr_phb_enabled = true;
>>>>>>>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
>>>>>>>> index 31b4661..2e92eb6 100644
>>>>>>>> --- a/hw/ppc/spapr_caps.c
>>>>>>>> +++ b/hw/ppc/spapr_caps.c
>>>>>>>> @@ -479,6 +479,22 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
>>>>>>>>      }
>>>>>>>>  }
>>>>>>>>  
>>>>>>>> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
>>>>>>>> +                                Error **errp)
>>>>>>>> +{
>>>>>>>> +    if (!val) {
>>>>>>>> +        return; /* Disabled by default */
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    if (tcg_enabled()) {
>>>>>>>> +        error_setg(errp,
>>>>>>>> +"No Firmware Assisted Non-Maskable Interrupts support in TCG, try cap-fwnmi-mce=off");
>>>>>>>
>>>>>>> Not allowing this for TCG creates an awkward incompatibility between
>>>>>>> KVM and TCG guests.  I can't actually see any reason to ban it for TCG
>>>>>>> - with the current code TCG won't ever generate NMIs, but I don't see
>>>>>>> that anything will actually break.
>>>>>>>
>>>>>>> In fact, we do have an nmi monitor command, currently wired to the
>>>>>>> spapr_nmi() function which resets each cpu, but it probably makes
>>>>>>> sense to wire it up to the fwnmi stuff when present.
>>>>>>
>>>>>> Yes, but that nmi support is not enough to inject a synchronous error
>>>>>> into the guest kernel. For example, we should provide the faulty address
>>>>>> along with other information such as the type of error (slb multi-hit,
>>>>>> memory error, TLB multi-hit) and when the error occurred (load/store)
>>>>>> and whether the error was completely recovered or not. Without such
>>>>>> information we cannot build the error log and pass it on to the guest
>>>>>> kernel. Right now nmi monitor command takes cpu number as the only argument.
>>>>>
>>>>> Obviously we can't inject an arbitrary MCE event with that monitor
>>>>> command.  But isn't there some sort of catch-all / unknown type of MCE
>>>>> event which we could inject?
>>>>
>>>> We have "unknown" type of error, but we should also pass an address in
>>>> the MCE event log. Strictly speaking this address should be a valid
>>>> address in the current CPU context as MCEs are synchronous errors
>>>> triggered when we touch a bad address.
>>>
>>> Well, some of them are.  At least historically both synchronous and
>>> asnchronous MCEs were possible.  Are there really no versions where
>>> you can report an MCE with unknown address?
>>
>> I am not aware of any such versions. Will cross check.
>>
>>>
>>>> We can pass a default address with every nmi, but I am not sure whether
>>>> that will be practically helpful.
>>>>
>>>>> It seems very confusing to me to have 2 totally separate "nmi"
>>>>> mechanisms.
>>>>>
>>>>>> So I think TCG support should be a separate patch by itself.
>>>>>
>>>>> Even if we don't wire up the monitor command, I still don't see
>>>>> anything that this patch breaks - we can support the nmi-register and
>>>>> nmi-interlock calls without ever actually creating MCE events.
>>>>
>>>> If we support nmi-register and nmi-interlock calls without the monitor
>>>> command wire-up then we will be falsely claiming the nmi support to the
>>>> guest while it is not actually supported.
>>>
>>> How so?  AFAICT, from the point of view of the guest this is not
>>> observably different from supporting the NMI mechanism but NMIs never
>>> occurring.
>>
>> A guest inserting a duplicate SLB will expect the machine check
>> exception delivered to the handler registered via nmi,register.
>> But we actually don't do that in TCG.
> 
> Ah, true, I was thinking of external hardware fault triggered MCEs
> rather than software error ones like duplicate SLB.
> 
> That said, I strongly suspect TCG is buggy enough at present that
> exact behaviour in rare error conditions like duplicate SLB is not
> really a big problem in the scheme of things.
> 
> I really don't think we can enable this by default until we allow it
> for TCG - we don't want starting a TCG guest to involve manually
> switching other options.
> 
> We could consider allowing it for TCG but just printing a warning that
> the behaviour may not be correct in some conditions - we do something
> similar for some of the Spectre workarounds already.

I think we better not enable this by default until we enhance TCG to
support fwnmi.

> 

-- 
Regards,
Aravinda

Re: [Qemu-devel] [Qemu-ppc] [PATCH v10 2/6] ppc: spapr: Introduce FWNMI capability

Posted by Greg Kurz 6 years, 7 months ago

On Fri, 5 Jul 2019 16:49:17 +0530
Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:

> 
> 
> On Friday 05 July 2019 06:37 AM, David Gibson wrote:
> > On Thu, Jul 04, 2019 at 10:33:11AM +0530, Aravinda Prasad wrote:
> >>
> >>
> >> On Thursday 04 July 2019 06:37 AM, David Gibson wrote:
> >>> On Wed, Jul 03, 2019 at 02:58:24PM +0530, Aravinda Prasad wrote:
> >>>>
> >>>>
> >>>> On Wednesday 03 July 2019 08:33 AM, David Gibson wrote:
> >>>>> On Tue, Jul 02, 2019 at 11:54:26AM +0530, Aravinda Prasad wrote:
> >>>>>>
> >>>>>>
> >>>>>> On Tuesday 02 July 2019 09:21 AM, David Gibson wrote:
> >>>>>>> On Wed, Jun 12, 2019 at 02:51:04PM +0530, Aravinda Prasad wrote:
> >>>>>>>> Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
> >>>>>>>> the KVM causes guest exit with NMI as exit reason
> >>>>>>>> when it encounters a machine check exception on the
> >>>>>>>> address belonging to a guest. Without this capability
> >>>>>>>> enabled, KVM redirects machine check exceptions to
> >>>>>>>> guest's 0x200 vector.
> >>>>>>>>
> >>>>>>>> This patch also introduces fwnmi-mce capability to
> >>>>>>>> deal with the case when a guest with the
> >>>>>>>> KVM_CAP_PPC_FWNMI capability enabled is attempted
> >>>>>>>> to migrate to a host that does not support this
> >>>>>>>> capability.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >>>>>>>> ---
> >>>>>>>>  hw/ppc/spapr.c         |    1 +
> >>>>>>>>  hw/ppc/spapr_caps.c    |   26 ++++++++++++++++++++++++++
> >>>>>>>>  include/hw/ppc/spapr.h |    4 +++-
> >>>>>>>>  target/ppc/kvm.c       |   19 +++++++++++++++++++
> >>>>>>>>  target/ppc/kvm_ppc.h   |   12 ++++++++++++
> >>>>>>>>  5 files changed, 61 insertions(+), 1 deletion(-)
> >>>>>>>>
> >>>>>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >>>>>>>> index 6dd8aaa..2ef86aa 100644
> >>>>>>>> --- a/hw/ppc/spapr.c
> >>>>>>>> +++ b/hw/ppc/spapr.c
> >>>>>>>> @@ -4360,6 +4360,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>>>>>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >>>>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>>>>>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> >>>>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >>>>>>>>      spapr_caps_add_properties(smc, &error_abort);
> >>>>>>>>      smc->irq = &spapr_irq_dual;
> >>>>>>>>      smc->dr_phb_enabled = true;
> >>>>>>>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
> >>>>>>>> index 31b4661..2e92eb6 100644
> >>>>>>>> --- a/hw/ppc/spapr_caps.c
> >>>>>>>> +++ b/hw/ppc/spapr_caps.c
> >>>>>>>> @@ -479,6 +479,22 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
> >>>>>>>>      }
> >>>>>>>>  }
> >>>>>>>>  
> >>>>>>>> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
> >>>>>>>> +                                Error **errp)
> >>>>>>>> +{
> >>>>>>>> +    if (!val) {
> >>>>>>>> +        return; /* Disabled by default */
> >>>>>>>> +    }
> >>>>>>>> +
> >>>>>>>> +    if (tcg_enabled()) {
> >>>>>>>> +        error_setg(errp,
> >>>>>>>> +"No Firmware Assisted Non-Maskable Interrupts support in TCG, try cap-fwnmi-mce=off");
> >>>>>>>
> >>>>>>> Not allowing this for TCG creates an awkward incompatibility between
> >>>>>>> KVM and TCG guests.  I can't actually see any reason to ban it for TCG
> >>>>>>> - with the current code TCG won't ever generate NMIs, but I don't see
> >>>>>>> that anything will actually break.
> >>>>>>>
> >>>>>>> In fact, we do have an nmi monitor command, currently wired to the
> >>>>>>> spapr_nmi() function which resets each cpu, but it probably makes
> >>>>>>> sense to wire it up to the fwnmi stuff when present.
> >>>>>>
> >>>>>> Yes, but that nmi support is not enough to inject a synchronous error
> >>>>>> into the guest kernel. For example, we should provide the faulty address
> >>>>>> along with other information such as the type of error (slb multi-hit,
> >>>>>> memory error, TLB multi-hit) and when the error occurred (load/store)
> >>>>>> and whether the error was completely recovered or not. Without such
> >>>>>> information we cannot build the error log and pass it on to the guest
> >>>>>> kernel. Right now nmi monitor command takes cpu number as the only argument.
> >>>>>
> >>>>> Obviously we can't inject an arbitrary MCE event with that monitor
> >>>>> command.  But isn't there some sort of catch-all / unknown type of MCE
> >>>>> event which we could inject?
> >>>>
> >>>> We have "unknown" type of error, but we should also pass an address in
> >>>> the MCE event log. Strictly speaking this address should be a valid
> >>>> address in the current CPU context as MCEs are synchronous errors
> >>>> triggered when we touch a bad address.
> >>>
> >>> Well, some of them are.  At least historically both synchronous and
> >>> asnchronous MCEs were possible.  Are there really no versions where
> >>> you can report an MCE with unknown address?
> >>
> >> I am not aware of any such versions. Will cross check.
> >>
> >>>
> >>>> We can pass a default address with every nmi, but I am not sure whether
> >>>> that will be practically helpful.
> >>>>
> >>>>> It seems very confusing to me to have 2 totally separate "nmi"
> >>>>> mechanisms.
> >>>>>
> >>>>>> So I think TCG support should be a separate patch by itself.
> >>>>>
> >>>>> Even if we don't wire up the monitor command, I still don't see
> >>>>> anything that this patch breaks - we can support the nmi-register and
> >>>>> nmi-interlock calls without ever actually creating MCE events.
> >>>>
> >>>> If we support nmi-register and nmi-interlock calls without the monitor
> >>>> command wire-up then we will be falsely claiming the nmi support to the
> >>>> guest while it is not actually supported.
> >>>
> >>> How so?  AFAICT, from the point of view of the guest this is not
> >>> observably different from supporting the NMI mechanism but NMIs never
> >>> occurring.
> >>
> >> A guest inserting a duplicate SLB will expect the machine check
> >> exception delivered to the handler registered via nmi,register.
> >> But we actually don't do that in TCG.
> > 
> > Ah, true, I was thinking of external hardware fault triggered MCEs
> > rather than software error ones like duplicate SLB.
> > 
> > That said, I strongly suspect TCG is buggy enough at present that
> > exact behaviour in rare error conditions like duplicate SLB is not
> > really a big problem in the scheme of things.
> > 
> > I really don't think we can enable this by default until we allow it
> > for TCG - we don't want starting a TCG guest to involve manually
> > switching other options.
> > 
> > We could consider allowing it for TCG but just printing a warning that
> > the behaviour may not be correct in some conditions - we do something
> > similar for some of the Spectre workarounds already.
> 
> I think we better not enable this by default until we enhance TCG to
> support fwnmi.
> 

If we ever enhance TCG... until this get done, I concur with David's
idea of just printing a warning. System emulation+TCG is more a CI
or developer thing: we just want FWNMI not to break anything, even
if it doesn't work. KVM is the real life scenario we want to support.
If the feature is valuable, and I think it is, it should be the
default otherwise fewer people will have a chance to take benefit
from it.

> > 
>

Re: [Qemu-devel] [Qemu-ppc] [PATCH v10 2/6] ppc: spapr: Introduce FWNMI capability

Posted by Aravinda Prasad 6 years, 7 months ago


On Friday 05 July 2019 06:53 PM, Greg Kurz wrote:
> On Fri, 5 Jul 2019 16:49:17 +0530
> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> 
>>
>>
>> On Friday 05 July 2019 06:37 AM, David Gibson wrote:
>>> On Thu, Jul 04, 2019 at 10:33:11AM +0530, Aravinda Prasad wrote:
>>>>
>>>>
>>>> On Thursday 04 July 2019 06:37 AM, David Gibson wrote:
>>>>> On Wed, Jul 03, 2019 at 02:58:24PM +0530, Aravinda Prasad wrote:
>>>>>>
>>>>>>
>>>>>> On Wednesday 03 July 2019 08:33 AM, David Gibson wrote:
>>>>>>> On Tue, Jul 02, 2019 at 11:54:26AM +0530, Aravinda Prasad wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>> On Tuesday 02 July 2019 09:21 AM, David Gibson wrote:
>>>>>>>>> On Wed, Jun 12, 2019 at 02:51:04PM +0530, Aravinda Prasad wrote:
>>>>>>>>>> Introduce the KVM capability KVM_CAP_PPC_FWNMI so that
>>>>>>>>>> the KVM causes guest exit with NMI as exit reason
>>>>>>>>>> when it encounters a machine check exception on the
>>>>>>>>>> address belonging to a guest. Without this capability
>>>>>>>>>> enabled, KVM redirects machine check exceptions to
>>>>>>>>>> guest's 0x200 vector.
>>>>>>>>>>
>>>>>>>>>> This patch also introduces fwnmi-mce capability to
>>>>>>>>>> deal with the case when a guest with the
>>>>>>>>>> KVM_CAP_PPC_FWNMI capability enabled is attempted
>>>>>>>>>> to migrate to a host that does not support this
>>>>>>>>>> capability.
>>>>>>>>>>
>>>>>>>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>>>>>>>>>> ---
>>>>>>>>>>  hw/ppc/spapr.c         |    1 +
>>>>>>>>>>  hw/ppc/spapr_caps.c    |   26 ++++++++++++++++++++++++++
>>>>>>>>>>  include/hw/ppc/spapr.h |    4 +++-
>>>>>>>>>>  target/ppc/kvm.c       |   19 +++++++++++++++++++
>>>>>>>>>>  target/ppc/kvm_ppc.h   |   12 ++++++++++++
>>>>>>>>>>  5 files changed, 61 insertions(+), 1 deletion(-)
>>>>>>>>>>
>>>>>>>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>>>>>>>> index 6dd8aaa..2ef86aa 100644
>>>>>>>>>> --- a/hw/ppc/spapr.c
>>>>>>>>>> +++ b/hw/ppc/spapr.c
>>>>>>>>>> @@ -4360,6 +4360,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>>>>>>>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>>>>>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>>>>>>>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
>>>>>>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>>>>>>>>>>      spapr_caps_add_properties(smc, &error_abort);
>>>>>>>>>>      smc->irq = &spapr_irq_dual;
>>>>>>>>>>      smc->dr_phb_enabled = true;
>>>>>>>>>> diff --git a/hw/ppc/spapr_caps.c b/hw/ppc/spapr_caps.c
>>>>>>>>>> index 31b4661..2e92eb6 100644
>>>>>>>>>> --- a/hw/ppc/spapr_caps.c
>>>>>>>>>> +++ b/hw/ppc/spapr_caps.c
>>>>>>>>>> @@ -479,6 +479,22 @@ static void cap_ccf_assist_apply(SpaprMachineState *spapr, uint8_t val,
>>>>>>>>>>      }
>>>>>>>>>>  }
>>>>>>>>>>  
>>>>>>>>>> +static void cap_fwnmi_mce_apply(SpaprMachineState *spapr, uint8_t val,
>>>>>>>>>> +                                Error **errp)
>>>>>>>>>> +{
>>>>>>>>>> +    if (!val) {
>>>>>>>>>> +        return; /* Disabled by default */
>>>>>>>>>> +    }
>>>>>>>>>> +
>>>>>>>>>> +    if (tcg_enabled()) {
>>>>>>>>>> +        error_setg(errp,
>>>>>>>>>> +"No Firmware Assisted Non-Maskable Interrupts support in TCG, try cap-fwnmi-mce=off");
>>>>>>>>>
>>>>>>>>> Not allowing this for TCG creates an awkward incompatibility between
>>>>>>>>> KVM and TCG guests.  I can't actually see any reason to ban it for TCG
>>>>>>>>> - with the current code TCG won't ever generate NMIs, but I don't see
>>>>>>>>> that anything will actually break.
>>>>>>>>>
>>>>>>>>> In fact, we do have an nmi monitor command, currently wired to the
>>>>>>>>> spapr_nmi() function which resets each cpu, but it probably makes
>>>>>>>>> sense to wire it up to the fwnmi stuff when present.
>>>>>>>>
>>>>>>>> Yes, but that nmi support is not enough to inject a synchronous error
>>>>>>>> into the guest kernel. For example, we should provide the faulty address
>>>>>>>> along with other information such as the type of error (slb multi-hit,
>>>>>>>> memory error, TLB multi-hit) and when the error occurred (load/store)
>>>>>>>> and whether the error was completely recovered or not. Without such
>>>>>>>> information we cannot build the error log and pass it on to the guest
>>>>>>>> kernel. Right now nmi monitor command takes cpu number as the only argument.
>>>>>>>
>>>>>>> Obviously we can't inject an arbitrary MCE event with that monitor
>>>>>>> command.  But isn't there some sort of catch-all / unknown type of MCE
>>>>>>> event which we could inject?
>>>>>>
>>>>>> We have "unknown" type of error, but we should also pass an address in
>>>>>> the MCE event log. Strictly speaking this address should be a valid
>>>>>> address in the current CPU context as MCEs are synchronous errors
>>>>>> triggered when we touch a bad address.
>>>>>
>>>>> Well, some of them are.  At least historically both synchronous and
>>>>> asnchronous MCEs were possible.  Are there really no versions where
>>>>> you can report an MCE with unknown address?
>>>>
>>>> I am not aware of any such versions. Will cross check.
>>>>
>>>>>
>>>>>> We can pass a default address with every nmi, but I am not sure whether
>>>>>> that will be practically helpful.
>>>>>>
>>>>>>> It seems very confusing to me to have 2 totally separate "nmi"
>>>>>>> mechanisms.
>>>>>>>
>>>>>>>> So I think TCG support should be a separate patch by itself.
>>>>>>>
>>>>>>> Even if we don't wire up the monitor command, I still don't see
>>>>>>> anything that this patch breaks - we can support the nmi-register and
>>>>>>> nmi-interlock calls without ever actually creating MCE events.
>>>>>>
>>>>>> If we support nmi-register and nmi-interlock calls without the monitor
>>>>>> command wire-up then we will be falsely claiming the nmi support to the
>>>>>> guest while it is not actually supported.
>>>>>
>>>>> How so?  AFAICT, from the point of view of the guest this is not
>>>>> observably different from supporting the NMI mechanism but NMIs never
>>>>> occurring.
>>>>
>>>> A guest inserting a duplicate SLB will expect the machine check
>>>> exception delivered to the handler registered via nmi,register.
>>>> But we actually don't do that in TCG.
>>>
>>> Ah, true, I was thinking of external hardware fault triggered MCEs
>>> rather than software error ones like duplicate SLB.
>>>
>>> That said, I strongly suspect TCG is buggy enough at present that
>>> exact behaviour in rare error conditions like duplicate SLB is not
>>> really a big problem in the scheme of things.
>>>
>>> I really don't think we can enable this by default until we allow it
>>> for TCG - we don't want starting a TCG guest to involve manually
>>> switching other options.
>>>
>>> We could consider allowing it for TCG but just printing a warning that
>>> the behaviour may not be correct in some conditions - we do something
>>> similar for some of the Spectre workarounds already.
>>
>> I think we better not enable this by default until we enhance TCG to
>> support fwnmi.
>>
> 
> If we ever enhance TCG... until this get done, I concur with David's
> idea of just printing a warning. System emulation+TCG is more a CI
> or developer thing: we just want FWNMI not to break anything, even
> if it doesn't work. KVM is the real life scenario we want to support.
> If the feature is valuable, and I think it is, it should be the
> default otherwise fewer people will have a chance to take benefit
> from it.

ok.

> 
>>>
>>
> 

-- 
Regards,
Aravinda

[Qemu-devel] [PATCH v10 1/6] Wrapper function to wait on condition for the main loop mutex
[Qemu-devel] [PATCH v10 2/6] ppc: spapr: Introduce FWNMI capability
[Qemu-devel] [PATCH v10 3/6] target/ppc: Handle NMI guest exit
[Qemu-devel] [PATCH v10 4/6] target/ppc: Build rtas error log upon an MCE
[Qemu-devel] [PATCH v10 5/6] migration: Include migration support for machine check handling
[Qemu-devel] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls