[Qemu-devel] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls

Aravinda Prasad posted 6 patches 6 years, 8 months ago
Maintainers: David Gibson <david@gibson.dropbear.id.au>, Richard Henderson <rth@twiddle.net>, Paolo Bonzini <pbonzini@redhat.com>
There is a newer version of this series
[Qemu-devel] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by Aravinda Prasad 6 years, 8 months ago
This patch adds support in QEMU to handle "ibm,nmi-register"
and "ibm,nmi-interlock" RTAS calls and sets the default
value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
type 4.0.

The machine check notification address is saved when the
OS issues "ibm,nmi-register" RTAS call.

This patch also handles the case when multiple processors
experience machine check at or about the same time by
handling "ibm,nmi-interlock" call. In such cases, as per
PAPR, subsequent processors serialize waiting for the first
processor to issue the "ibm,nmi-interlock" call. The second
processor that also received a machine check error waits
till the first processor is done reading the error log.
The first processor issues "ibm,nmi-interlock" call
when the error log is consumed.

Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
---
 hw/ppc/spapr.c         |    6 ++++-
 hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/hw/ppc/spapr.h |    5 +++-
 3 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index 3d6d139..213d493 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
         /* Create the error string for live migration blocker */
         error_setg(&spapr->fwnmi_migration_blocker,
                 "Live migration not supported during machine check handling");
+
+        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
+        spapr_fwnmi_register();
     }
 
     spapr->rtas_blob = g_malloc(spapr->rtas_size);
@@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
     smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
     smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
-    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
     spapr_caps_add_properties(smc, &error_abort);
     smc->irq = &spapr_irq_dual;
     smc->dr_phb_enabled = true;
@@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
     smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
     smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
     smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
+    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
 }
 
 DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
index a015a80..e010cb2 100644
--- a/hw/ppc/spapr_rtas.c
+++ b/hw/ppc/spapr_rtas.c
@@ -49,6 +49,7 @@
 #include "hw/ppc/fdt.h"
 #include "target/ppc/mmu-hash64.h"
 #include "target/ppc/mmu-book3s-v3.h"
+#include "migration/blocker.h"
 
 static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
                                    uint32_t token, uint32_t nargs,
@@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
     rtas_st(rets, 1, 100);
 }
 
+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+                                  SpaprMachineState *spapr,
+                                  uint32_t token, uint32_t nargs,
+                                  target_ulong args,
+                                  uint32_t nret, target_ulong rets)
+{
+    int ret;
+    hwaddr rtas_addr = spapr_get_rtas_addr();
+
+    if (!rtas_addr) {
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    }
+
+    ret = kvmppc_fwnmi_enable(cpu);
+    if (ret == 1) {
+        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
+        return;
+    } else if (ret < 0) {
+        error_report("Couldn't enable KVM FWNMI capability");
+        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
+        return;
+    }
+
+    spapr->guest_machine_check_addr = rtas_ld(args, 1);
+    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+}
+
+static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
+                                   SpaprMachineState *spapr,
+                                   uint32_t token, uint32_t nargs,
+                                   target_ulong args,
+                                   uint32_t nret, target_ulong rets)
+{
+    if (spapr->guest_machine_check_addr == -1) {
+        /* NMI register not called */
+        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
+    } else {
+        /*
+         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
+         * hence unset mc_status.
+         */
+        spapr->mc_status = -1;
+        qemu_cond_signal(&spapr->mc_delivery_cond);
+        migrate_del_blocker(spapr->fwnmi_migration_blocker);
+        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
+    }
+}
+
 static struct rtas_call {
     const char *name;
     spapr_rtas_fn fn;
@@ -496,6 +551,14 @@ hwaddr spapr_get_rtas_addr(void)
     return (hwaddr)fdt32_to_cpu(*rtas_data);
 }
 
+void spapr_fwnmi_register(void)
+{
+    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
+                        rtas_ibm_nmi_register);
+    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
+                        rtas_ibm_nmi_interlock);
+}
+
 static void core_rtas_register_types(void)
 {
     spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 0dedf0a..7ae53e2 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -637,8 +637,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
 #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
 #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
 #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
+#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2A)
+#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2B)
 
-#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
+#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2C)
 
 /* RTAS ibm,get-system-parameter token values */
 #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
@@ -894,4 +896,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
 
 void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
 hwaddr spapr_get_rtas_addr(void);
+void spapr_fwnmi_register(void);
 #endif /* HW_SPAPR_H */


Re: [Qemu-devel] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by David Gibson 6 years, 7 months ago
On Wed, Jun 12, 2019 at 02:51:38PM +0530, Aravinda Prasad wrote:
> This patch adds support in QEMU to handle "ibm,nmi-register"
> and "ibm,nmi-interlock" RTAS calls and sets the default
> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
> type 4.0.
> 
> The machine check notification address is saved when the
> OS issues "ibm,nmi-register" RTAS call.
> 
> This patch also handles the case when multiple processors
> experience machine check at or about the same time by
> handling "ibm,nmi-interlock" call. In such cases, as per
> PAPR, subsequent processors serialize waiting for the first
> processor to issue the "ibm,nmi-interlock" call. The second
> processor that also received a machine check error waits
> till the first processor is done reading the error log.
> The first processor issues "ibm,nmi-interlock" call
> when the error log is consumed.
> 
> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr.c         |    6 ++++-
>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/spapr.h |    5 +++-
>  3 files changed, 72 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 3d6d139..213d493 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
>          /* Create the error string for live migration blocker */
>          error_setg(&spapr->fwnmi_migration_blocker,
>                  "Live migration not supported during machine check handling");
> +
> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
> +        spapr_fwnmi_register();
>      }
>  
>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;

Turning this on by default really isn't ok if it stops you running TCG
guests at all.

>      spapr_caps_add_properties(smc, &error_abort);
>      smc->irq = &spapr_irq_dual;
>      smc->dr_phb_enabled = true;
> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;

We're now well past 4.0, and in fact we're about to go into soft
freeze for 4.1, so we're going to miss that too.  So 4.1 and earlier
will need to retain the old default.

>  }
>  
>  DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index a015a80..e010cb2 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -49,6 +49,7 @@
>  #include "hw/ppc/fdt.h"
>  #include "target/ppc/mmu-hash64.h"
>  #include "target/ppc/mmu-book3s-v3.h"
> +#include "migration/blocker.h"
>  
>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
>                                     uint32_t token, uint32_t nargs,
> @@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
>      rtas_st(rets, 1, 100);
>  }
>  
> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
> +                                  SpaprMachineState *spapr,
> +                                  uint32_t token, uint32_t nargs,
> +                                  target_ulong args,
> +                                  uint32_t nret, target_ulong rets)
> +{
> +    int ret;
> +    hwaddr rtas_addr = spapr_get_rtas_addr();
> +
> +    if (!rtas_addr) {
> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> +        return;
> +    }
> +
> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> +        return;
> +    }
> +
> +    ret = kvmppc_fwnmi_enable(cpu);
> +    if (ret == 1) {
> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);

I don't understand this case separate from the others.  We've already
set the cap, so fwnmi support should be checked and available.

> +        return;
> +    } else if (ret < 0) {
> +        error_report("Couldn't enable KVM FWNMI capability");
> +        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
> +        return;
> +    }
> +
> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +}
> +
> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> +                                   SpaprMachineState *spapr,
> +                                   uint32_t token, uint32_t nargs,
> +                                   target_ulong args,
> +                                   uint32_t nret, target_ulong rets)
> +{
> +    if (spapr->guest_machine_check_addr == -1) {
> +        /* NMI register not called */
> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> +    } else {
> +        /*
> +         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
> +         * hence unset mc_status.
> +         */
> +        spapr->mc_status = -1;
> +        qemu_cond_signal(&spapr->mc_delivery_cond);
> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);

Hrm.  We add the blocker at the mce request point.  First, that's in
another patch, which isn't great.  Second, does that mean we could add
multiple times if we get an MCE on multiple CPUs?  Will that work and
correctly match adds and removes properly?

> +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +    }
> +}
> +
>  static struct rtas_call {
>      const char *name;
>      spapr_rtas_fn fn;
> @@ -496,6 +551,14 @@ hwaddr spapr_get_rtas_addr(void)
>      return (hwaddr)fdt32_to_cpu(*rtas_data);
>  }
>  
> +void spapr_fwnmi_register(void)
> +{
> +    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
> +                        rtas_ibm_nmi_register);
> +    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
> +                        rtas_ibm_nmi_interlock);
> +}
> +
>  static void core_rtas_register_types(void)
>  {
>      spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 0dedf0a..7ae53e2 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -637,8 +637,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
>  #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
>  #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
>  #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
> +#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2A)
> +#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2B)
>  
> -#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
> +#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2C)
>  
>  /* RTAS ibm,get-system-parameter token values */
>  #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
> @@ -894,4 +896,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
>  
>  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
>  hwaddr spapr_get_rtas_addr(void);
> +void spapr_fwnmi_register(void);
>  #endif /* HW_SPAPR_H */
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
Re: [Qemu-devel] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by Aravinda Prasad 6 years, 7 months ago

On Tuesday 02 July 2019 09:41 AM, David Gibson wrote:
> On Wed, Jun 12, 2019 at 02:51:38PM +0530, Aravinda Prasad wrote:
>> This patch adds support in QEMU to handle "ibm,nmi-register"
>> and "ibm,nmi-interlock" RTAS calls and sets the default
>> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
>> type 4.0.
>>
>> The machine check notification address is saved when the
>> OS issues "ibm,nmi-register" RTAS call.
>>
>> This patch also handles the case when multiple processors
>> experience machine check at or about the same time by
>> handling "ibm,nmi-interlock" call. In such cases, as per
>> PAPR, subsequent processors serialize waiting for the first
>> processor to issue the "ibm,nmi-interlock" call. The second
>> processor that also received a machine check error waits
>> till the first processor is done reading the error log.
>> The first processor issues "ibm,nmi-interlock" call
>> when the error log is consumed.
>>
>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>> ---
>>  hw/ppc/spapr.c         |    6 ++++-
>>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/hw/ppc/spapr.h |    5 +++-
>>  3 files changed, 72 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 3d6d139..213d493 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
>>          /* Create the error string for live migration blocker */
>>          error_setg(&spapr->fwnmi_migration_blocker,
>>                  "Live migration not supported during machine check handling");
>> +
>> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
>> +        spapr_fwnmi_register();
>>      }
>>  
>>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
>> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
>> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
> 
> Turning this on by default really isn't ok if it stops you running TCG
> guests at all.

If so this can be "off" by default until TCG is supported.

> 
>>      spapr_caps_add_properties(smc, &error_abort);
>>      smc->irq = &spapr_irq_dual;
>>      smc->dr_phb_enabled = true;
>> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
>>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
>>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> 
> We're now well past 4.0, and in fact we're about to go into soft
> freeze for 4.1, so we're going to miss that too.  So 4.1 and earlier
> will need to retain the old default.

ok.

> 
>>  }
>>  
>>  DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>> index a015a80..e010cb2 100644
>> --- a/hw/ppc/spapr_rtas.c
>> +++ b/hw/ppc/spapr_rtas.c
>> @@ -49,6 +49,7 @@
>>  #include "hw/ppc/fdt.h"
>>  #include "target/ppc/mmu-hash64.h"
>>  #include "target/ppc/mmu-book3s-v3.h"
>> +#include "migration/blocker.h"
>>  
>>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>                                     uint32_t token, uint32_t nargs,
>> @@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>      rtas_st(rets, 1, 100);
>>  }
>>  
>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>> +                                  SpaprMachineState *spapr,
>> +                                  uint32_t token, uint32_t nargs,
>> +                                  target_ulong args,
>> +                                  uint32_t nret, target_ulong rets)
>> +{
>> +    int ret;
>> +    hwaddr rtas_addr = spapr_get_rtas_addr();
>> +
>> +    if (!rtas_addr) {
>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>> +        return;
>> +    }
>> +
>> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>> +        return;
>> +    }
>> +
>> +    ret = kvmppc_fwnmi_enable(cpu);
>> +    if (ret == 1) {
>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> 
> I don't understand this case separate from the others.  We've already
> set the cap, so fwnmi support should be checked and available.

But we have not enabled fwnmi in KVM. kvmppc_fwnmi_enable() returns 1 if
cap_ppc_fwnmi is not available in KVM.

> 
>> +        return;
>> +    } else if (ret < 0) {
>> +        error_report("Couldn't enable KVM FWNMI capability");
>> +        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
>> +        return;
>> +    }
>> +
>> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
>> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> +}
>> +
>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>> +                                   SpaprMachineState *spapr,
>> +                                   uint32_t token, uint32_t nargs,
>> +                                   target_ulong args,
>> +                                   uint32_t nret, target_ulong rets)
>> +{
>> +    if (spapr->guest_machine_check_addr == -1) {
>> +        /* NMI register not called */
>> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>> +    } else {
>> +        /*
>> +         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
>> +         * hence unset mc_status.
>> +         */
>> +        spapr->mc_status = -1;
>> +        qemu_cond_signal(&spapr->mc_delivery_cond);
>> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);
> 
> Hrm.  We add the blocker at the mce request point.  First, that's in
> another patch, which isn't great.  Second, does that mean we could add
> multiple times if we get an MCE on multiple CPUs?  Will that work and
> correctly match adds and removes properly?

If it is fine to move the migration patch as the last patch in the
sequence, then we will have add and del blocker in the same patch.

And yes we could add multiple times if we get MCE on multiple CPUs and
as all those cpus call interlock there should be matching number of
delete blockers.

Regards,
Aravinda

> 
>> +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> +    }
>> +}
>> +
>>  static struct rtas_call {
>>      const char *name;
>>      spapr_rtas_fn fn;
>> @@ -496,6 +551,14 @@ hwaddr spapr_get_rtas_addr(void)
>>      return (hwaddr)fdt32_to_cpu(*rtas_data);
>>  }
>>  
>> +void spapr_fwnmi_register(void)
>> +{
>> +    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
>> +                        rtas_ibm_nmi_register);
>> +    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
>> +                        rtas_ibm_nmi_interlock);
>> +}
>> +
>>  static void core_rtas_register_types(void)
>>  {
>>      spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 0dedf0a..7ae53e2 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -637,8 +637,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
>>  #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
>>  #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
>>  #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
>> +#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2A)
>> +#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2B)
>>  
>> -#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
>> +#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2C)
>>  
>>  /* RTAS ibm,get-system-parameter token values */
>>  #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
>> @@ -894,4 +896,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
>>  
>>  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
>>  hwaddr spapr_get_rtas_addr(void);
>> +void spapr_fwnmi_register(void);
>>  #endif /* HW_SPAPR_H */
>>
> 

-- 
Regards,
Aravinda


Re: [Qemu-devel] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by David Gibson 6 years, 7 months ago
On Tue, Jul 02, 2019 at 04:10:08PM +0530, Aravinda Prasad wrote:
> 
> 
> On Tuesday 02 July 2019 09:41 AM, David Gibson wrote:
> > On Wed, Jun 12, 2019 at 02:51:38PM +0530, Aravinda Prasad wrote:
> >> This patch adds support in QEMU to handle "ibm,nmi-register"
> >> and "ibm,nmi-interlock" RTAS calls and sets the default
> >> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
> >> type 4.0.
> >>
> >> The machine check notification address is saved when the
> >> OS issues "ibm,nmi-register" RTAS call.
> >>
> >> This patch also handles the case when multiple processors
> >> experience machine check at or about the same time by
> >> handling "ibm,nmi-interlock" call. In such cases, as per
> >> PAPR, subsequent processors serialize waiting for the first
> >> processor to issue the "ibm,nmi-interlock" call. The second
> >> processor that also received a machine check error waits
> >> till the first processor is done reading the error log.
> >> The first processor issues "ibm,nmi-interlock" call
> >> when the error log is consumed.
> >>
> >> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >> ---
> >>  hw/ppc/spapr.c         |    6 ++++-
> >>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
> >>  include/hw/ppc/spapr.h |    5 +++-
> >>  3 files changed, 72 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index 3d6d139..213d493 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
> >>          /* Create the error string for live migration blocker */
> >>          error_setg(&spapr->fwnmi_migration_blocker,
> >>                  "Live migration not supported during machine check handling");
> >> +
> >> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
> >> +        spapr_fwnmi_register();
> >>      }
> >>  
> >>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
> >> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> >> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
> > 
> > Turning this on by default really isn't ok if it stops you running TCG
> > guests at all.
> 
> If so this can be "off" by default until TCG is supported.
> 
> > 
> >>      spapr_caps_add_properties(smc, &error_abort);
> >>      smc->irq = &spapr_irq_dual;
> >>      smc->dr_phb_enabled = true;
> >> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
> >>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
> >>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
> >>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
> >> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> > 
> > We're now well past 4.0, and in fact we're about to go into soft
> > freeze for 4.1, so we're going to miss that too.  So 4.1 and earlier
> > will need to retain the old default.
> 
> ok.
> 
> > 
> >>  }
> >>  
> >>  DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
> >> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> >> index a015a80..e010cb2 100644
> >> --- a/hw/ppc/spapr_rtas.c
> >> +++ b/hw/ppc/spapr_rtas.c
> >> @@ -49,6 +49,7 @@
> >>  #include "hw/ppc/fdt.h"
> >>  #include "target/ppc/mmu-hash64.h"
> >>  #include "target/ppc/mmu-book3s-v3.h"
> >> +#include "migration/blocker.h"
> >>  
> >>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >>                                     uint32_t token, uint32_t nargs,
> >> @@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >>      rtas_st(rets, 1, 100);
> >>  }
> >>  
> >> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
> >> +                                  SpaprMachineState *spapr,
> >> +                                  uint32_t token, uint32_t nargs,
> >> +                                  target_ulong args,
> >> +                                  uint32_t nret, target_ulong rets)
> >> +{
> >> +    int ret;
> >> +    hwaddr rtas_addr = spapr_get_rtas_addr();
> >> +
> >> +    if (!rtas_addr) {
> >> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >> +        return;
> >> +    }
> >> +
> >> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
> >> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >> +        return;
> >> +    }
> >> +
> >> +    ret = kvmppc_fwnmi_enable(cpu);
> >> +    if (ret == 1) {
> >> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> > 
> > I don't understand this case separate from the others.  We've already
> > set the cap, so fwnmi support should be checked and available.
> 
> But we have not enabled fwnmi in KVM. kvmppc_fwnmi_enable() returns 1 if
> cap_ppc_fwnmi is not available in KVM.

But you've checked for the presence of the extension, yes?  So a
failure to enable the cap would be unexpected.  In which case how does
this case differ from.. 

> 
> > 
> >> +        return;
> >> +    } else if (ret < 0) {
> >> +        error_report("Couldn't enable KVM FWNMI capability");
> >> +        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
> >> +        return;

..this case.

> >> +    }
> >> +
> >> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
> >> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> >> +}
> >> +
> >> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> >> +                                   SpaprMachineState *spapr,
> >> +                                   uint32_t token, uint32_t nargs,
> >> +                                   target_ulong args,
> >> +                                   uint32_t nret, target_ulong rets)
> >> +{
> >> +    if (spapr->guest_machine_check_addr == -1) {
> >> +        /* NMI register not called */
> >> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> >> +    } else {
> >> +        /*
> >> +         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
> >> +         * hence unset mc_status.
> >> +         */
> >> +        spapr->mc_status = -1;
> >> +        qemu_cond_signal(&spapr->mc_delivery_cond);
> >> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);
> > 
> > Hrm.  We add the blocker at the mce request point.  First, that's in
> > another patch, which isn't great.  Second, does that mean we could add
> > multiple times if we get an MCE on multiple CPUs?  Will that work and
> > correctly match adds and removes properly?
> 
> If it is fine to move the migration patch as the last patch in the
> sequence, then we will have add and del blocker in the same patch.
> 
> And yes we could add multiple times if we get MCE on multiple CPUs and
> as all those cpus call interlock there should be matching number of
> delete blockers.

Ok, and I think adding the same pointer to the list multiple times
will work ok.

Btw, add_blocker() can fail - have you handled failure conditions?

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
Re: [Qemu-devel] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by Aravinda Prasad 6 years, 7 months ago

On Wednesday 03 July 2019 08:50 AM, David Gibson wrote:
> On Tue, Jul 02, 2019 at 04:10:08PM +0530, Aravinda Prasad wrote:
>>
>>
>> On Tuesday 02 July 2019 09:41 AM, David Gibson wrote:
>>> On Wed, Jun 12, 2019 at 02:51:38PM +0530, Aravinda Prasad wrote:
>>>> This patch adds support in QEMU to handle "ibm,nmi-register"
>>>> and "ibm,nmi-interlock" RTAS calls and sets the default
>>>> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
>>>> type 4.0.
>>>>
>>>> The machine check notification address is saved when the
>>>> OS issues "ibm,nmi-register" RTAS call.
>>>>
>>>> This patch also handles the case when multiple processors
>>>> experience machine check at or about the same time by
>>>> handling "ibm,nmi-interlock" call. In such cases, as per
>>>> PAPR, subsequent processors serialize waiting for the first
>>>> processor to issue the "ibm,nmi-interlock" call. The second
>>>> processor that also received a machine check error waits
>>>> till the first processor is done reading the error log.
>>>> The first processor issues "ibm,nmi-interlock" call
>>>> when the error log is consumed.
>>>>
>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>>>> ---
>>>>  hw/ppc/spapr.c         |    6 ++++-
>>>>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  include/hw/ppc/spapr.h |    5 +++-
>>>>  3 files changed, 72 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>> index 3d6d139..213d493 100644
>>>> --- a/hw/ppc/spapr.c
>>>> +++ b/hw/ppc/spapr.c
>>>> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
>>>>          /* Create the error string for live migration blocker */
>>>>          error_setg(&spapr->fwnmi_migration_blocker,
>>>>                  "Live migration not supported during machine check handling");
>>>> +
>>>> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
>>>> +        spapr_fwnmi_register();
>>>>      }
>>>>  
>>>>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
>>>> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
>>>> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
>>>
>>> Turning this on by default really isn't ok if it stops you running TCG
>>> guests at all.
>>
>> If so this can be "off" by default until TCG is supported.
>>
>>>
>>>>      spapr_caps_add_properties(smc, &error_abort);
>>>>      smc->irq = &spapr_irq_dual;
>>>>      smc->dr_phb_enabled = true;
>>>> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
>>>>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
>>>>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>>>
>>> We're now well past 4.0, and in fact we're about to go into soft
>>> freeze for 4.1, so we're going to miss that too.  So 4.1 and earlier
>>> will need to retain the old default.
>>
>> ok.
>>
>>>
>>>>  }
>>>>  
>>>>  DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
>>>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>>>> index a015a80..e010cb2 100644
>>>> --- a/hw/ppc/spapr_rtas.c
>>>> +++ b/hw/ppc/spapr_rtas.c
>>>> @@ -49,6 +49,7 @@
>>>>  #include "hw/ppc/fdt.h"
>>>>  #include "target/ppc/mmu-hash64.h"
>>>>  #include "target/ppc/mmu-book3s-v3.h"
>>>> +#include "migration/blocker.h"
>>>>  
>>>>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>>>                                     uint32_t token, uint32_t nargs,
>>>> @@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>>>      rtas_st(rets, 1, 100);
>>>>  }
>>>>  
>>>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>>>> +                                  SpaprMachineState *spapr,
>>>> +                                  uint32_t token, uint32_t nargs,
>>>> +                                  target_ulong args,
>>>> +                                  uint32_t nret, target_ulong rets)
>>>> +{
>>>> +    int ret;
>>>> +    hwaddr rtas_addr = spapr_get_rtas_addr();
>>>> +
>>>> +    if (!rtas_addr) {
>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    ret = kvmppc_fwnmi_enable(cpu);
>>>> +    if (ret == 1) {
>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>>>
>>> I don't understand this case separate from the others.  We've already
>>> set the cap, so fwnmi support should be checked and available.
>>
>> But we have not enabled fwnmi in KVM. kvmppc_fwnmi_enable() returns 1 if
>> cap_ppc_fwnmi is not available in KVM.
> 
> But you've checked for the presence of the extension, yes?  So a
> failure to enable the cap would be unexpected.  In which case how does
> this case differ from.. 

No, this is the function where I check for the presence of the
extension. In kvm_arch_init() we just set cap_ppc_fwnmi to 1 if KVM
support is available, but don't take any action if unavailable.

So this case is when we are running an old version of KVM with no
cap_ppc_fwnmi support.

> 
>>
>>>
>>>> +        return;
>>>> +    } else if (ret < 0) {
>>>> +        error_report("Couldn't enable KVM FWNMI capability");
>>>> +        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
>>>> +        return;
> 
> ..this case.

And this is when we have the KVM support but due to some problem with
either KVM or QEMU we are unable to enable cap_ppc_fwnmi.

> 
>>>> +    }
>>>> +
>>>> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
>>>> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>>>> +}
>>>> +
>>>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>>>> +                                   SpaprMachineState *spapr,
>>>> +                                   uint32_t token, uint32_t nargs,
>>>> +                                   target_ulong args,
>>>> +                                   uint32_t nret, target_ulong rets)
>>>> +{
>>>> +    if (spapr->guest_machine_check_addr == -1) {
>>>> +        /* NMI register not called */
>>>> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>>>> +    } else {
>>>> +        /*
>>>> +         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
>>>> +         * hence unset mc_status.
>>>> +         */
>>>> +        spapr->mc_status = -1;
>>>> +        qemu_cond_signal(&spapr->mc_delivery_cond);
>>>> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);
>>>
>>> Hrm.  We add the blocker at the mce request point.  First, that's in
>>> another patch, which isn't great.  Second, does that mean we could add
>>> multiple times if we get an MCE on multiple CPUs?  Will that work and
>>> correctly match adds and removes properly?
>>
>> If it is fine to move the migration patch as the last patch in the
>> sequence, then we will have add and del blocker in the same patch.
>>
>> And yes we could add multiple times if we get MCE on multiple CPUs and
>> as all those cpus call interlock there should be matching number of
>> delete blockers.
> 
> Ok, and I think adding the same pointer to the list multiple times
> will work ok.

I think so

> 
> Btw, add_blocker() can fail - have you handled failure conditions?

yes, I am handling it.

> 

-- 
Regards,
Aravinda


Re: [Qemu-devel] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by David Gibson 6 years, 7 months ago
On Wed, Jul 03, 2019 at 02:30:31PM +0530, Aravinda Prasad wrote:
> 
> 
> On Wednesday 03 July 2019 08:50 AM, David Gibson wrote:
> > On Tue, Jul 02, 2019 at 04:10:08PM +0530, Aravinda Prasad wrote:
> >>
> >>
> >> On Tuesday 02 July 2019 09:41 AM, David Gibson wrote:
> >>> On Wed, Jun 12, 2019 at 02:51:38PM +0530, Aravinda Prasad wrote:
> >>>> This patch adds support in QEMU to handle "ibm,nmi-register"
> >>>> and "ibm,nmi-interlock" RTAS calls and sets the default
> >>>> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
> >>>> type 4.0.
> >>>>
> >>>> The machine check notification address is saved when the
> >>>> OS issues "ibm,nmi-register" RTAS call.
> >>>>
> >>>> This patch also handles the case when multiple processors
> >>>> experience machine check at or about the same time by
> >>>> handling "ibm,nmi-interlock" call. In such cases, as per
> >>>> PAPR, subsequent processors serialize waiting for the first
> >>>> processor to issue the "ibm,nmi-interlock" call. The second
> >>>> processor that also received a machine check error waits
> >>>> till the first processor is done reading the error log.
> >>>> The first processor issues "ibm,nmi-interlock" call
> >>>> when the error log is consumed.
> >>>>
> >>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >>>> ---
> >>>>  hw/ppc/spapr.c         |    6 ++++-
> >>>>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
> >>>>  include/hw/ppc/spapr.h |    5 +++-
> >>>>  3 files changed, 72 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >>>> index 3d6d139..213d493 100644
> >>>> --- a/hw/ppc/spapr.c
> >>>> +++ b/hw/ppc/spapr.c
> >>>> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
> >>>>          /* Create the error string for live migration blocker */
> >>>>          error_setg(&spapr->fwnmi_migration_blocker,
> >>>>                  "Live migration not supported during machine check handling");
> >>>> +
> >>>> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
> >>>> +        spapr_fwnmi_register();
> >>>>      }
> >>>>  
> >>>>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
> >>>> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> >>>> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
> >>>
> >>> Turning this on by default really isn't ok if it stops you running TCG
> >>> guests at all.
> >>
> >> If so this can be "off" by default until TCG is supported.
> >>
> >>>
> >>>>      spapr_caps_add_properties(smc, &error_abort);
> >>>>      smc->irq = &spapr_irq_dual;
> >>>>      smc->dr_phb_enabled = true;
> >>>> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
> >>>>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
> >>>>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
> >>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
> >>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >>>
> >>> We're now well past 4.0, and in fact we're about to go into soft
> >>> freeze for 4.1, so we're going to miss that too.  So 4.1 and earlier
> >>> will need to retain the old default.
> >>
> >> ok.
> >>
> >>>
> >>>>  }
> >>>>  
> >>>>  DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
> >>>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> >>>> index a015a80..e010cb2 100644
> >>>> --- a/hw/ppc/spapr_rtas.c
> >>>> +++ b/hw/ppc/spapr_rtas.c
> >>>> @@ -49,6 +49,7 @@
> >>>>  #include "hw/ppc/fdt.h"
> >>>>  #include "target/ppc/mmu-hash64.h"
> >>>>  #include "target/ppc/mmu-book3s-v3.h"
> >>>> +#include "migration/blocker.h"
> >>>>  
> >>>>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >>>>                                     uint32_t token, uint32_t nargs,
> >>>> @@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >>>>      rtas_st(rets, 1, 100);
> >>>>  }
> >>>>  
> >>>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
> >>>> +                                  SpaprMachineState *spapr,
> >>>> +                                  uint32_t token, uint32_t nargs,
> >>>> +                                  target_ulong args,
> >>>> +                                  uint32_t nret, target_ulong rets)
> >>>> +{
> >>>> +    int ret;
> >>>> +    hwaddr rtas_addr = spapr_get_rtas_addr();
> >>>> +
> >>>> +    if (!rtas_addr) {
> >>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >>>> +        return;
> >>>> +    }
> >>>> +
> >>>> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
> >>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >>>> +        return;
> >>>> +    }
> >>>> +
> >>>> +    ret = kvmppc_fwnmi_enable(cpu);
> >>>> +    if (ret == 1) {
> >>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >>>
> >>> I don't understand this case separate from the others.  We've already
> >>> set the cap, so fwnmi support should be checked and available.
> >>
> >> But we have not enabled fwnmi in KVM. kvmppc_fwnmi_enable() returns 1 if
> >> cap_ppc_fwnmi is not available in KVM.
> > 
> > But you've checked for the presence of the extension, yes?  So a
> > failure to enable the cap would be unexpected.  In which case how does
> > this case differ from.. 
> 
> No, this is the function where I check for the presence of the
> extension. In kvm_arch_init() we just set cap_ppc_fwnmi to 1 if KVM
> support is available, but don't take any action if unavailable.

Yeah, that's not ok.  You should be checking for the presence of the
extension in the .apply() function.  If you start up with the spapr
cap selected then failing at nmi-register time means something has
gone badly wrong.

This is necessary for migration: if you start on a system with nmi
support and the guest registers for it, you can't then migrate safely
to a system that doesn't have nmi support.  The way to handle that
case is to have qemu fail to even start up on a destination without
the support.

> So this case is when we are running an old version of KVM with no
> cap_ppc_fwnmi support.
> 
> > 
> >>
> >>>
> >>>> +        return;
> >>>> +    } else if (ret < 0) {
> >>>> +        error_report("Couldn't enable KVM FWNMI capability");
> >>>> +        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
> >>>> +        return;
> > 
> > ..this case.
> 
> And this is when we have the KVM support but due to some problem with
> either KVM or QEMU we are unable to enable cap_ppc_fwnmi.
> 
> > 
> >>>> +    }
> >>>> +
> >>>> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
> >>>> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> >>>> +}
> >>>> +
> >>>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> >>>> +                                   SpaprMachineState *spapr,
> >>>> +                                   uint32_t token, uint32_t nargs,
> >>>> +                                   target_ulong args,
> >>>> +                                   uint32_t nret, target_ulong rets)
> >>>> +{
> >>>> +    if (spapr->guest_machine_check_addr == -1) {
> >>>> +        /* NMI register not called */
> >>>> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> >>>> +    } else {
> >>>> +        /*
> >>>> +         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
> >>>> +         * hence unset mc_status.
> >>>> +         */
> >>>> +        spapr->mc_status = -1;
> >>>> +        qemu_cond_signal(&spapr->mc_delivery_cond);
> >>>> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);
> >>>
> >>> Hrm.  We add the blocker at the mce request point.  First, that's in
> >>> another patch, which isn't great.  Second, does that mean we could add
> >>> multiple times if we get an MCE on multiple CPUs?  Will that work and
> >>> correctly match adds and removes properly?
> >>
> >> If it is fine to move the migration patch as the last patch in the
> >> sequence, then we will have add and del blocker in the same patch.
> >>
> >> And yes we could add multiple times if we get MCE on multiple CPUs and
> >> as all those cpus call interlock there should be matching number of
> >> delete blockers.
> > 
> > Ok, and I think adding the same pointer to the list multiple times
> > will work ok.
> 
> I think so
> 
> > 
> > Btw, add_blocker() can fail - have you handled failure conditions?
> 
> yes, I am handling it.
> 
> > 
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
Re: [Qemu-devel] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by Aravinda Prasad 6 years, 7 months ago

On Thursday 04 July 2019 06:42 AM, David Gibson wrote:
> On Wed, Jul 03, 2019 at 02:30:31PM +0530, Aravinda Prasad wrote:
>>
>>
>> On Wednesday 03 July 2019 08:50 AM, David Gibson wrote:
>>> On Tue, Jul 02, 2019 at 04:10:08PM +0530, Aravinda Prasad wrote:
>>>>
>>>>
>>>> On Tuesday 02 July 2019 09:41 AM, David Gibson wrote:
>>>>> On Wed, Jun 12, 2019 at 02:51:38PM +0530, Aravinda Prasad wrote:
>>>>>> This patch adds support in QEMU to handle "ibm,nmi-register"
>>>>>> and "ibm,nmi-interlock" RTAS calls and sets the default
>>>>>> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
>>>>>> type 4.0.
>>>>>>
>>>>>> The machine check notification address is saved when the
>>>>>> OS issues "ibm,nmi-register" RTAS call.
>>>>>>
>>>>>> This patch also handles the case when multiple processors
>>>>>> experience machine check at or about the same time by
>>>>>> handling "ibm,nmi-interlock" call. In such cases, as per
>>>>>> PAPR, subsequent processors serialize waiting for the first
>>>>>> processor to issue the "ibm,nmi-interlock" call. The second
>>>>>> processor that also received a machine check error waits
>>>>>> till the first processor is done reading the error log.
>>>>>> The first processor issues "ibm,nmi-interlock" call
>>>>>> when the error log is consumed.
>>>>>>
>>>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>>>>>> ---
>>>>>>  hw/ppc/spapr.c         |    6 ++++-
>>>>>>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>  include/hw/ppc/spapr.h |    5 +++-
>>>>>>  3 files changed, 72 insertions(+), 2 deletions(-)
>>>>>>
>>>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>>>> index 3d6d139..213d493 100644
>>>>>> --- a/hw/ppc/spapr.c
>>>>>> +++ b/hw/ppc/spapr.c
>>>>>> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
>>>>>>          /* Create the error string for live migration blocker */
>>>>>>          error_setg(&spapr->fwnmi_migration_blocker,
>>>>>>                  "Live migration not supported during machine check handling");
>>>>>> +
>>>>>> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
>>>>>> +        spapr_fwnmi_register();
>>>>>>      }
>>>>>>  
>>>>>>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
>>>>>> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>>>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>>>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
>>>>>> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
>>>>>
>>>>> Turning this on by default really isn't ok if it stops you running TCG
>>>>> guests at all.
>>>>
>>>> If so this can be "off" by default until TCG is supported.
>>>>
>>>>>
>>>>>>      spapr_caps_add_properties(smc, &error_abort);
>>>>>>      smc->irq = &spapr_irq_dual;
>>>>>>      smc->dr_phb_enabled = true;
>>>>>> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
>>>>>>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
>>>>>>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
>>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
>>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>>>>>
>>>>> We're now well past 4.0, and in fact we're about to go into soft
>>>>> freeze for 4.1, so we're going to miss that too.  So 4.1 and earlier
>>>>> will need to retain the old default.
>>>>
>>>> ok.
>>>>
>>>>>
>>>>>>  }
>>>>>>  
>>>>>>  DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
>>>>>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>>>>>> index a015a80..e010cb2 100644
>>>>>> --- a/hw/ppc/spapr_rtas.c
>>>>>> +++ b/hw/ppc/spapr_rtas.c
>>>>>> @@ -49,6 +49,7 @@
>>>>>>  #include "hw/ppc/fdt.h"
>>>>>>  #include "target/ppc/mmu-hash64.h"
>>>>>>  #include "target/ppc/mmu-book3s-v3.h"
>>>>>> +#include "migration/blocker.h"
>>>>>>  
>>>>>>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>>>>>                                     uint32_t token, uint32_t nargs,
>>>>>> @@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>>>>>      rtas_st(rets, 1, 100);
>>>>>>  }
>>>>>>  
>>>>>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>>>>>> +                                  SpaprMachineState *spapr,
>>>>>> +                                  uint32_t token, uint32_t nargs,
>>>>>> +                                  target_ulong args,
>>>>>> +                                  uint32_t nret, target_ulong rets)
>>>>>> +{
>>>>>> +    int ret;
>>>>>> +    hwaddr rtas_addr = spapr_get_rtas_addr();
>>>>>> +
>>>>>> +    if (!rtas_addr) {
>>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>>>>>> +        return;
>>>>>> +    }
>>>>>> +
>>>>>> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
>>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>>>>>> +        return;
>>>>>> +    }
>>>>>> +
>>>>>> +    ret = kvmppc_fwnmi_enable(cpu);
>>>>>> +    if (ret == 1) {
>>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>>>>>
>>>>> I don't understand this case separate from the others.  We've already
>>>>> set the cap, so fwnmi support should be checked and available.
>>>>
>>>> But we have not enabled fwnmi in KVM. kvmppc_fwnmi_enable() returns 1 if
>>>> cap_ppc_fwnmi is not available in KVM.
>>>
>>> But you've checked for the presence of the extension, yes?  So a
>>> failure to enable the cap would be unexpected.  In which case how does
>>> this case differ from.. 
>>
>> No, this is the function where I check for the presence of the
>> extension. In kvm_arch_init() we just set cap_ppc_fwnmi to 1 if KVM
>> support is available, but don't take any action if unavailable.
> 
> Yeah, that's not ok.  You should be checking for the presence of the
> extension in the .apply() function.  If you start up with the spapr
> cap selected then failing at nmi-register time means something has
> gone badly wrong.

So, I should check for two things in the .apply() function: first if
cap_ppc_fwnmi is supported and second if cap_ppc_fwnmi is enabled in KVM.

In that case kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0) should be
called during spapr_machine_init().

So, we will fail to boot (when SPAPR_CAP_FWNMI_MCE=ON) if cap_ppc_fwnmi
can't be enabled irrespective of whether a guest issues nmi,register or not.

> 
> This is necessary for migration: if you start on a system with nmi
> support and the guest registers for it, you can't then migrate safely
> to a system that doesn't have nmi support.  The way to handle that
> case is to have qemu fail to even start up on a destination without
> the support.
> 
>> So this case is when we are running an old version of KVM with no
>> cap_ppc_fwnmi support.
>>
>>>
>>>>
>>>>>
>>>>>> +        return;
>>>>>> +    } else if (ret < 0) {
>>>>>> +        error_report("Couldn't enable KVM FWNMI capability");
>>>>>> +        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
>>>>>> +        return;
>>>
>>> ..this case.
>>
>> And this is when we have the KVM support but due to some problem with
>> either KVM or QEMU we are unable to enable cap_ppc_fwnmi.
>>
>>>
>>>>>> +    }
>>>>>> +
>>>>>> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
>>>>>> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>>>>>> +}
>>>>>> +
>>>>>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>>>>>> +                                   SpaprMachineState *spapr,
>>>>>> +                                   uint32_t token, uint32_t nargs,
>>>>>> +                                   target_ulong args,
>>>>>> +                                   uint32_t nret, target_ulong rets)
>>>>>> +{
>>>>>> +    if (spapr->guest_machine_check_addr == -1) {
>>>>>> +        /* NMI register not called */
>>>>>> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>>>>>> +    } else {
>>>>>> +        /*
>>>>>> +         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
>>>>>> +         * hence unset mc_status.
>>>>>> +         */
>>>>>> +        spapr->mc_status = -1;
>>>>>> +        qemu_cond_signal(&spapr->mc_delivery_cond);
>>>>>> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);
>>>>>
>>>>> Hrm.  We add the blocker at the mce request point.  First, that's in
>>>>> another patch, which isn't great.  Second, does that mean we could add
>>>>> multiple times if we get an MCE on multiple CPUs?  Will that work and
>>>>> correctly match adds and removes properly?
>>>>
>>>> If it is fine to move the migration patch as the last patch in the
>>>> sequence, then we will have add and del blocker in the same patch.
>>>>
>>>> And yes we could add multiple times if we get MCE on multiple CPUs and
>>>> as all those cpus call interlock there should be matching number of
>>>> delete blockers.
>>>
>>> Ok, and I think adding the same pointer to the list multiple times
>>> will work ok.
>>
>> I think so
>>
>>>
>>> Btw, add_blocker() can fail - have you handled failure conditions?
>>
>> yes, I am handling it.
>>
>>>
>>
> 

-- 
Regards,
Aravinda


Re: [Qemu-devel] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by David Gibson 6 years, 7 months ago
On Thu, Jul 04, 2019 at 10:49:05AM +0530, Aravinda Prasad wrote:
> 
> 
> On Thursday 04 July 2019 06:42 AM, David Gibson wrote:
> > On Wed, Jul 03, 2019 at 02:30:31PM +0530, Aravinda Prasad wrote:
> >>
> >>
> >> On Wednesday 03 July 2019 08:50 AM, David Gibson wrote:
> >>> On Tue, Jul 02, 2019 at 04:10:08PM +0530, Aravinda Prasad wrote:
> >>>>
> >>>>
> >>>> On Tuesday 02 July 2019 09:41 AM, David Gibson wrote:
> >>>>> On Wed, Jun 12, 2019 at 02:51:38PM +0530, Aravinda Prasad wrote:
> >>>>>> This patch adds support in QEMU to handle "ibm,nmi-register"
> >>>>>> and "ibm,nmi-interlock" RTAS calls and sets the default
> >>>>>> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
> >>>>>> type 4.0.
> >>>>>>
> >>>>>> The machine check notification address is saved when the
> >>>>>> OS issues "ibm,nmi-register" RTAS call.
> >>>>>>
> >>>>>> This patch also handles the case when multiple processors
> >>>>>> experience machine check at or about the same time by
> >>>>>> handling "ibm,nmi-interlock" call. In such cases, as per
> >>>>>> PAPR, subsequent processors serialize waiting for the first
> >>>>>> processor to issue the "ibm,nmi-interlock" call. The second
> >>>>>> processor that also received a machine check error waits
> >>>>>> till the first processor is done reading the error log.
> >>>>>> The first processor issues "ibm,nmi-interlock" call
> >>>>>> when the error log is consumed.
> >>>>>>
> >>>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >>>>>> ---
> >>>>>>  hw/ppc/spapr.c         |    6 ++++-
> >>>>>>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
> >>>>>>  include/hw/ppc/spapr.h |    5 +++-
> >>>>>>  3 files changed, 72 insertions(+), 2 deletions(-)
> >>>>>>
> >>>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >>>>>> index 3d6d139..213d493 100644
> >>>>>> --- a/hw/ppc/spapr.c
> >>>>>> +++ b/hw/ppc/spapr.c
> >>>>>> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
> >>>>>>          /* Create the error string for live migration blocker */
> >>>>>>          error_setg(&spapr->fwnmi_migration_blocker,
> >>>>>>                  "Live migration not supported during machine check handling");
> >>>>>> +
> >>>>>> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
> >>>>>> +        spapr_fwnmi_register();
> >>>>>>      }
> >>>>>>  
> >>>>>>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
> >>>>>> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> >>>>>> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
> >>>>>
> >>>>> Turning this on by default really isn't ok if it stops you running TCG
> >>>>> guests at all.
> >>>>
> >>>> If so this can be "off" by default until TCG is supported.
> >>>>
> >>>>>
> >>>>>>      spapr_caps_add_properties(smc, &error_abort);
> >>>>>>      smc->irq = &spapr_irq_dual;
> >>>>>>      smc->dr_phb_enabled = true;
> >>>>>> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
> >>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >>>>>
> >>>>> We're now well past 4.0, and in fact we're about to go into soft
> >>>>> freeze for 4.1, so we're going to miss that too.  So 4.1 and earlier
> >>>>> will need to retain the old default.
> >>>>
> >>>> ok.
> >>>>
> >>>>>
> >>>>>>  }
> >>>>>>  
> >>>>>>  DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
> >>>>>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> >>>>>> index a015a80..e010cb2 100644
> >>>>>> --- a/hw/ppc/spapr_rtas.c
> >>>>>> +++ b/hw/ppc/spapr_rtas.c
> >>>>>> @@ -49,6 +49,7 @@
> >>>>>>  #include "hw/ppc/fdt.h"
> >>>>>>  #include "target/ppc/mmu-hash64.h"
> >>>>>>  #include "target/ppc/mmu-book3s-v3.h"
> >>>>>> +#include "migration/blocker.h"
> >>>>>>  
> >>>>>>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >>>>>>                                     uint32_t token, uint32_t nargs,
> >>>>>> @@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >>>>>>      rtas_st(rets, 1, 100);
> >>>>>>  }
> >>>>>>  
> >>>>>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
> >>>>>> +                                  SpaprMachineState *spapr,
> >>>>>> +                                  uint32_t token, uint32_t nargs,
> >>>>>> +                                  target_ulong args,
> >>>>>> +                                  uint32_t nret, target_ulong rets)
> >>>>>> +{
> >>>>>> +    int ret;
> >>>>>> +    hwaddr rtas_addr = spapr_get_rtas_addr();
> >>>>>> +
> >>>>>> +    if (!rtas_addr) {
> >>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >>>>>> +        return;
> >>>>>> +    }
> >>>>>> +
> >>>>>> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
> >>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >>>>>> +        return;
> >>>>>> +    }
> >>>>>> +
> >>>>>> +    ret = kvmppc_fwnmi_enable(cpu);
> >>>>>> +    if (ret == 1) {
> >>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >>>>>
> >>>>> I don't understand this case separate from the others.  We've already
> >>>>> set the cap, so fwnmi support should be checked and available.
> >>>>
> >>>> But we have not enabled fwnmi in KVM. kvmppc_fwnmi_enable() returns 1 if
> >>>> cap_ppc_fwnmi is not available in KVM.
> >>>
> >>> But you've checked for the presence of the extension, yes?  So a
> >>> failure to enable the cap would be unexpected.  In which case how does
> >>> this case differ from.. 
> >>
> >> No, this is the function where I check for the presence of the
> >> extension. In kvm_arch_init() we just set cap_ppc_fwnmi to 1 if KVM
> >> support is available, but don't take any action if unavailable.
> > 
> > Yeah, that's not ok.  You should be checking for the presence of the
> > extension in the .apply() function.  If you start up with the spapr
> > cap selected then failing at nmi-register time means something has
> > gone badly wrong.
> 
> So, I should check for two things in the .apply() function: first if
> cap_ppc_fwnmi is supported and second if cap_ppc_fwnmi is enabled in KVM.

Not exactly.  Checking that the extension is supported means you *can*
enable it in KVM, but you should not do so at .apply() time (or NMI
behaviour won't be correct until nmi-register is called IIUC).  It
does mean that when you do enable the cap, a "not supported" failure
means something is wrong with the kernel.

> In that case kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0) should be
> called during spapr_machine_init().
> 
> So, we will fail to boot (when SPAPR_CAP_FWNMI_MCE=ON) if cap_ppc_fwnmi
> can't be enabled irrespective of whether a guest issues nmi,register or not.
> 
> > 
> > This is necessary for migration: if you start on a system with nmi
> > support and the guest registers for it, you can't then migrate safely
> > to a system that doesn't have nmi support.  The way to handle that
> > case is to have qemu fail to even start up on a destination without
> > the support.
> > 
> >> So this case is when we are running an old version of KVM with no
> >> cap_ppc_fwnmi support.
> >>
> >>>
> >>>>
> >>>>>
> >>>>>> +        return;
> >>>>>> +    } else if (ret < 0) {
> >>>>>> +        error_report("Couldn't enable KVM FWNMI capability");
> >>>>>> +        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
> >>>>>> +        return;
> >>>
> >>> ..this case.
> >>
> >> And this is when we have the KVM support but due to some problem with
> >> either KVM or QEMU we are unable to enable cap_ppc_fwnmi.
> >>
> >>>
> >>>>>> +    }
> >>>>>> +
> >>>>>> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
> >>>>>> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> >>>>>> +}
> >>>>>> +
> >>>>>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> >>>>>> +                                   SpaprMachineState *spapr,
> >>>>>> +                                   uint32_t token, uint32_t nargs,
> >>>>>> +                                   target_ulong args,
> >>>>>> +                                   uint32_t nret, target_ulong rets)
> >>>>>> +{
> >>>>>> +    if (spapr->guest_machine_check_addr == -1) {
> >>>>>> +        /* NMI register not called */
> >>>>>> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> >>>>>> +    } else {
> >>>>>> +        /*
> >>>>>> +         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
> >>>>>> +         * hence unset mc_status.
> >>>>>> +         */
> >>>>>> +        spapr->mc_status = -1;
> >>>>>> +        qemu_cond_signal(&spapr->mc_delivery_cond);
> >>>>>> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);
> >>>>>
> >>>>> Hrm.  We add the blocker at the mce request point.  First, that's in
> >>>>> another patch, which isn't great.  Second, does that mean we could add
> >>>>> multiple times if we get an MCE on multiple CPUs?  Will that work and
> >>>>> correctly match adds and removes properly?
> >>>>
> >>>> If it is fine to move the migration patch as the last patch in the
> >>>> sequence, then we will have add and del blocker in the same patch.
> >>>>
> >>>> And yes we could add multiple times if we get MCE on multiple CPUs and
> >>>> as all those cpus call interlock there should be matching number of
> >>>> delete blockers.
> >>>
> >>> Ok, and I think adding the same pointer to the list multiple times
> >>> will work ok.
> >>
> >> I think so
> >>
> >>>
> >>> Btw, add_blocker() can fail - have you handled failure conditions?
> >>
> >> yes, I am handling it.
> >>
> >>>
> >>
> > 
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson
Re: [Qemu-devel] [Qemu-ppc] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by Greg Kurz 6 years, 7 months ago
On Thu, 4 Jul 2019 10:49:05 +0530
Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:

> 
> 
> On Thursday 04 July 2019 06:42 AM, David Gibson wrote:
> > On Wed, Jul 03, 2019 at 02:30:31PM +0530, Aravinda Prasad wrote:
> >>
> >>
> >> On Wednesday 03 July 2019 08:50 AM, David Gibson wrote:
> >>> On Tue, Jul 02, 2019 at 04:10:08PM +0530, Aravinda Prasad wrote:
> >>>>
> >>>>
> >>>> On Tuesday 02 July 2019 09:41 AM, David Gibson wrote:
> >>>>> On Wed, Jun 12, 2019 at 02:51:38PM +0530, Aravinda Prasad wrote:
> >>>>>> This patch adds support in QEMU to handle "ibm,nmi-register"
> >>>>>> and "ibm,nmi-interlock" RTAS calls and sets the default
> >>>>>> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
> >>>>>> type 4.0.
> >>>>>>
> >>>>>> The machine check notification address is saved when the
> >>>>>> OS issues "ibm,nmi-register" RTAS call.
> >>>>>>
> >>>>>> This patch also handles the case when multiple processors
> >>>>>> experience machine check at or about the same time by
> >>>>>> handling "ibm,nmi-interlock" call. In such cases, as per
> >>>>>> PAPR, subsequent processors serialize waiting for the first
> >>>>>> processor to issue the "ibm,nmi-interlock" call. The second
> >>>>>> processor that also received a machine check error waits
> >>>>>> till the first processor is done reading the error log.
> >>>>>> The first processor issues "ibm,nmi-interlock" call
> >>>>>> when the error log is consumed.
> >>>>>>
> >>>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >>>>>> ---
> >>>>>>  hw/ppc/spapr.c         |    6 ++++-
> >>>>>>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
> >>>>>>  include/hw/ppc/spapr.h |    5 +++-
> >>>>>>  3 files changed, 72 insertions(+), 2 deletions(-)
> >>>>>>
> >>>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >>>>>> index 3d6d139..213d493 100644
> >>>>>> --- a/hw/ppc/spapr.c
> >>>>>> +++ b/hw/ppc/spapr.c
> >>>>>> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
> >>>>>>          /* Create the error string for live migration blocker */
> >>>>>>          error_setg(&spapr->fwnmi_migration_blocker,
> >>>>>>                  "Live migration not supported during machine check handling");
> >>>>>> +
> >>>>>> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
> >>>>>> +        spapr_fwnmi_register();
> >>>>>>      }
> >>>>>>  
> >>>>>>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
> >>>>>> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> >>>>>> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
> >>>>>
> >>>>> Turning this on by default really isn't ok if it stops you running TCG
> >>>>> guests at all.
> >>>>
> >>>> If so this can be "off" by default until TCG is supported.
> >>>>
> >>>>>
> >>>>>>      spapr_caps_add_properties(smc, &error_abort);
> >>>>>>      smc->irq = &spapr_irq_dual;
> >>>>>>      smc->dr_phb_enabled = true;
> >>>>>> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
> >>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
> >>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >>>>>
> >>>>> We're now well past 4.0, and in fact we're about to go into soft
> >>>>> freeze for 4.1, so we're going to miss that too.  So 4.1 and earlier
> >>>>> will need to retain the old default.
> >>>>
> >>>> ok.
> >>>>
> >>>>>
> >>>>>>  }
> >>>>>>  
> >>>>>>  DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
> >>>>>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> >>>>>> index a015a80..e010cb2 100644
> >>>>>> --- a/hw/ppc/spapr_rtas.c
> >>>>>> +++ b/hw/ppc/spapr_rtas.c
> >>>>>> @@ -49,6 +49,7 @@
> >>>>>>  #include "hw/ppc/fdt.h"
> >>>>>>  #include "target/ppc/mmu-hash64.h"
> >>>>>>  #include "target/ppc/mmu-book3s-v3.h"
> >>>>>> +#include "migration/blocker.h"
> >>>>>>  
> >>>>>>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >>>>>>                                     uint32_t token, uint32_t nargs,
> >>>>>> @@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >>>>>>      rtas_st(rets, 1, 100);
> >>>>>>  }
> >>>>>>  
> >>>>>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
> >>>>>> +                                  SpaprMachineState *spapr,
> >>>>>> +                                  uint32_t token, uint32_t nargs,
> >>>>>> +                                  target_ulong args,
> >>>>>> +                                  uint32_t nret, target_ulong rets)
> >>>>>> +{
> >>>>>> +    int ret;
> >>>>>> +    hwaddr rtas_addr = spapr_get_rtas_addr();
> >>>>>> +
> >>>>>> +    if (!rtas_addr) {
> >>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >>>>>> +        return;
> >>>>>> +    }
> >>>>>> +
> >>>>>> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
> >>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >>>>>> +        return;
> >>>>>> +    }
> >>>>>> +
> >>>>>> +    ret = kvmppc_fwnmi_enable(cpu);
> >>>>>> +    if (ret == 1) {
> >>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >>>>>
> >>>>> I don't understand this case separate from the others.  We've already
> >>>>> set the cap, so fwnmi support should be checked and available.
> >>>>
> >>>> But we have not enabled fwnmi in KVM. kvmppc_fwnmi_enable() returns 1 if
> >>>> cap_ppc_fwnmi is not available in KVM.
> >>>
> >>> But you've checked for the presence of the extension, yes?  So a
> >>> failure to enable the cap would be unexpected.  In which case how does
> >>> this case differ from.. 
> >>
> >> No, this is the function where I check for the presence of the
> >> extension. In kvm_arch_init() we just set cap_ppc_fwnmi to 1 if KVM
> >> support is available, but don't take any action if unavailable.
> > 
> > Yeah, that's not ok.  You should be checking for the presence of the
> > extension in the .apply() function.  If you start up with the spapr
> > cap selected then failing at nmi-register time means something has
> > gone badly wrong.
> 
> So, I should check for two things in the .apply() function: first if
> cap_ppc_fwnmi is supported and second if cap_ppc_fwnmi is enabled in KVM.
> 
> In that case kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0) should be
> called during spapr_machine_init().
> 
> So, we will fail to boot (when SPAPR_CAP_FWNMI_MCE=ON) if cap_ppc_fwnmi
> can't be enabled irrespective of whether a guest issues nmi,register or not.
> 

Yes. The idea is that we don't want to expose some feature to the guest
if we already know it cannot work. The same stands for migration, if
we know the destination host doesn't support the feature, we don't want
to confuse the guest because the feature disappeared unexpectedly. The
correct way to address that is to check the feature is supported in QEMU
and/or KVM before we start the guest or before we accept a migration
stream and fail early if we can't provide the feature.

Speaking of migration, kvmppc_fwnmi_enable() should be called in a
post load callback... and the problem I see is that this is a vCPU
ioctl. So either we add a state "I should enable FWNMI on post load"
to the vCPU that called nmi,register, or maybe first_cpu can do the
trick in a post load callback of vmstate_spapr_machine_check.

BTW, since FWNMI is a platform wide feature, ie. you don't need to
enable it on a per-CPU basis), why is KVM_CAP_PPC_FWNMI a vCPU ioctl
and not a VM ioctl ?

> > 
> > This is necessary for migration: if you start on a system with nmi
> > support and the guest registers for it, you can't then migrate safely
> > to a system that doesn't have nmi support.  The way to handle that
> > case is to have qemu fail to even start up on a destination without
> > the support.
> > 
> >> So this case is when we are running an old version of KVM with no
> >> cap_ppc_fwnmi support.
> >>
> >>>
> >>>>
> >>>>>
> >>>>>> +        return;
> >>>>>> +    } else if (ret < 0) {
> >>>>>> +        error_report("Couldn't enable KVM FWNMI capability");
> >>>>>> +        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
> >>>>>> +        return;
> >>>
> >>> ..this case.
> >>
> >> And this is when we have the KVM support but due to some problem with
> >> either KVM or QEMU we are unable to enable cap_ppc_fwnmi.
> >>
> >>>
> >>>>>> +    }
> >>>>>> +
> >>>>>> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
> >>>>>> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> >>>>>> +}
> >>>>>> +
> >>>>>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> >>>>>> +                                   SpaprMachineState *spapr,
> >>>>>> +                                   uint32_t token, uint32_t nargs,
> >>>>>> +                                   target_ulong args,
> >>>>>> +                                   uint32_t nret, target_ulong rets)
> >>>>>> +{
> >>>>>> +    if (spapr->guest_machine_check_addr == -1) {
> >>>>>> +        /* NMI register not called */
> >>>>>> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> >>>>>> +    } else {
> >>>>>> +        /*
> >>>>>> +         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
> >>>>>> +         * hence unset mc_status.
> >>>>>> +         */
> >>>>>> +        spapr->mc_status = -1;
> >>>>>> +        qemu_cond_signal(&spapr->mc_delivery_cond);
> >>>>>> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);
> >>>>>
> >>>>> Hrm.  We add the blocker at the mce request point.  First, that's in
> >>>>> another patch, which isn't great.  Second, does that mean we could add
> >>>>> multiple times if we get an MCE on multiple CPUs?  Will that work and
> >>>>> correctly match adds and removes properly?
> >>>>
> >>>> If it is fine to move the migration patch as the last patch in the
> >>>> sequence, then we will have add and del blocker in the same patch.
> >>>>
> >>>> And yes we could add multiple times if we get MCE on multiple CPUs and
> >>>> as all those cpus call interlock there should be matching number of
> >>>> delete blockers.
> >>>
> >>> Ok, and I think adding the same pointer to the list multiple times
> >>> will work ok.
> >>
> >> I think so
> >>
> >>>
> >>> Btw, add_blocker() can fail - have you handled failure conditions?
> >>
> >> yes, I am handling it.
> >>
> >>>
> >>
> > 
> 


Re: [Qemu-devel] [Qemu-ppc] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by Aravinda Prasad 6 years, 7 months ago

On Friday 05 July 2019 12:07 AM, Greg Kurz wrote:
> On Thu, 4 Jul 2019 10:49:05 +0530
> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> 
>>
>>
>> On Thursday 04 July 2019 06:42 AM, David Gibson wrote:
>>> On Wed, Jul 03, 2019 at 02:30:31PM +0530, Aravinda Prasad wrote:
>>>>
>>>>
>>>> On Wednesday 03 July 2019 08:50 AM, David Gibson wrote:
>>>>> On Tue, Jul 02, 2019 at 04:10:08PM +0530, Aravinda Prasad wrote:
>>>>>>
>>>>>>
>>>>>> On Tuesday 02 July 2019 09:41 AM, David Gibson wrote:
>>>>>>> On Wed, Jun 12, 2019 at 02:51:38PM +0530, Aravinda Prasad wrote:
>>>>>>>> This patch adds support in QEMU to handle "ibm,nmi-register"
>>>>>>>> and "ibm,nmi-interlock" RTAS calls and sets the default
>>>>>>>> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
>>>>>>>> type 4.0.
>>>>>>>>
>>>>>>>> The machine check notification address is saved when the
>>>>>>>> OS issues "ibm,nmi-register" RTAS call.
>>>>>>>>
>>>>>>>> This patch also handles the case when multiple processors
>>>>>>>> experience machine check at or about the same time by
>>>>>>>> handling "ibm,nmi-interlock" call. In such cases, as per
>>>>>>>> PAPR, subsequent processors serialize waiting for the first
>>>>>>>> processor to issue the "ibm,nmi-interlock" call. The second
>>>>>>>> processor that also received a machine check error waits
>>>>>>>> till the first processor is done reading the error log.
>>>>>>>> The first processor issues "ibm,nmi-interlock" call
>>>>>>>> when the error log is consumed.
>>>>>>>>
>>>>>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>>>>>>>> ---
>>>>>>>>  hw/ppc/spapr.c         |    6 ++++-
>>>>>>>>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>>>>>>  include/hw/ppc/spapr.h |    5 +++-
>>>>>>>>  3 files changed, 72 insertions(+), 2 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>>>>>> index 3d6d139..213d493 100644
>>>>>>>> --- a/hw/ppc/spapr.c
>>>>>>>> +++ b/hw/ppc/spapr.c
>>>>>>>> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
>>>>>>>>          /* Create the error string for live migration blocker */
>>>>>>>>          error_setg(&spapr->fwnmi_migration_blocker,
>>>>>>>>                  "Live migration not supported during machine check handling");
>>>>>>>> +
>>>>>>>> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
>>>>>>>> +        spapr_fwnmi_register();
>>>>>>>>      }
>>>>>>>>  
>>>>>>>>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
>>>>>>>> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>>>>>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>>>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>>>>>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
>>>>>>>> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>>>>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
>>>>>>>
>>>>>>> Turning this on by default really isn't ok if it stops you running TCG
>>>>>>> guests at all.
>>>>>>
>>>>>> If so this can be "off" by default until TCG is supported.
>>>>>>
>>>>>>>
>>>>>>>>      spapr_caps_add_properties(smc, &error_abort);
>>>>>>>>      smc->irq = &spapr_irq_dual;
>>>>>>>>      smc->dr_phb_enabled = true;
>>>>>>>> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
>>>>>>>>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
>>>>>>>>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
>>>>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
>>>>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>>>>>>>
>>>>>>> We're now well past 4.0, and in fact we're about to go into soft
>>>>>>> freeze for 4.1, so we're going to miss that too.  So 4.1 and earlier
>>>>>>> will need to retain the old default.
>>>>>>
>>>>>> ok.
>>>>>>
>>>>>>>
>>>>>>>>  }
>>>>>>>>  
>>>>>>>>  DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
>>>>>>>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>>>>>>>> index a015a80..e010cb2 100644
>>>>>>>> --- a/hw/ppc/spapr_rtas.c
>>>>>>>> +++ b/hw/ppc/spapr_rtas.c
>>>>>>>> @@ -49,6 +49,7 @@
>>>>>>>>  #include "hw/ppc/fdt.h"
>>>>>>>>  #include "target/ppc/mmu-hash64.h"
>>>>>>>>  #include "target/ppc/mmu-book3s-v3.h"
>>>>>>>> +#include "migration/blocker.h"
>>>>>>>>  
>>>>>>>>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>>>>>>>                                     uint32_t token, uint32_t nargs,
>>>>>>>> @@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>>>>>>>      rtas_st(rets, 1, 100);
>>>>>>>>  }
>>>>>>>>  
>>>>>>>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>>>>>>>> +                                  SpaprMachineState *spapr,
>>>>>>>> +                                  uint32_t token, uint32_t nargs,
>>>>>>>> +                                  target_ulong args,
>>>>>>>> +                                  uint32_t nret, target_ulong rets)
>>>>>>>> +{
>>>>>>>> +    int ret;
>>>>>>>> +    hwaddr rtas_addr = spapr_get_rtas_addr();
>>>>>>>> +
>>>>>>>> +    if (!rtas_addr) {
>>>>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>>>>>>>> +        return;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
>>>>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>>>>>>>> +        return;
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    ret = kvmppc_fwnmi_enable(cpu);
>>>>>>>> +    if (ret == 1) {
>>>>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>>>>>>>
>>>>>>> I don't understand this case separate from the others.  We've already
>>>>>>> set the cap, so fwnmi support should be checked and available.
>>>>>>
>>>>>> But we have not enabled fwnmi in KVM. kvmppc_fwnmi_enable() returns 1 if
>>>>>> cap_ppc_fwnmi is not available in KVM.
>>>>>
>>>>> But you've checked for the presence of the extension, yes?  So a
>>>>> failure to enable the cap would be unexpected.  In which case how does
>>>>> this case differ from.. 
>>>>
>>>> No, this is the function where I check for the presence of the
>>>> extension. In kvm_arch_init() we just set cap_ppc_fwnmi to 1 if KVM
>>>> support is available, but don't take any action if unavailable.
>>>
>>> Yeah, that's not ok.  You should be checking for the presence of the
>>> extension in the .apply() function.  If you start up with the spapr
>>> cap selected then failing at nmi-register time means something has
>>> gone badly wrong.
>>
>> So, I should check for two things in the .apply() function: first if
>> cap_ppc_fwnmi is supported and second if cap_ppc_fwnmi is enabled in KVM.
>>
>> In that case kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0) should be
>> called during spapr_machine_init().
>>
>> So, we will fail to boot (when SPAPR_CAP_FWNMI_MCE=ON) if cap_ppc_fwnmi
>> can't be enabled irrespective of whether a guest issues nmi,register or not.
>>
> 
> Yes. The idea is that we don't want to expose some feature to the guest
> if we already know it cannot work. The same stands for migration, if
> we know the destination host doesn't support the feature, we don't want
> to confuse the guest because the feature disappeared unexpectedly. The
> correct way to address that is to check the feature is supported in QEMU
> and/or KVM before we start the guest or before we accept a migration
> stream and fail early if we can't provide the feature.

ok.

> 
> Speaking of migration, kvmppc_fwnmi_enable() should be called in a
> post load callback... and the problem I see is that this is a vCPU
> ioctl. So either we add a state "I should enable FWNMI on post load"
> to the vCPU that called nmi,register, or maybe first_cpu can do the
> trick in a post load callback of vmstate_spapr_machine_check.

To summarize the changes I am planning:

If SPAPR_CAP_FWNMI_MCE=ON, then enable cap_ppc_fwnmi in KVM. If
cap_ppc_fwnmi can't be enabled then fail early. This can be included in
spapr_machine_init() function.

During migration, if SPAPR_CAP_FWNMI_MCE=ON then enable cap_ppc_fwnmi by
having a .post_load callback which calls kvmppc_fwnmi_enable(). Fail the
migration if we can't enable cap_ppc_fwnmi on the destination host.

> 
> BTW, since FWNMI is a platform wide feature, ie. you don't need to
> enable it on a per-CPU basis), why is KVM_CAP_PPC_FWNMI a vCPU ioctl
> and not a VM ioctl ?

I think it should be VM ioctl, let me check.

Regards,
Aravinda

> 
>>>
>>> This is necessary for migration: if you start on a system with nmi
>>> support and the guest registers for it, you can't then migrate safely
>>> to a system that doesn't have nmi support.  The way to handle that
>>> case is to have qemu fail to even start up on a destination without
>>> the support.
>>>
>>>> So this case is when we are running an old version of KVM with no
>>>> cap_ppc_fwnmi support.
>>>>
>>>>>
>>>>>>
>>>>>>>
>>>>>>>> +        return;
>>>>>>>> +    } else if (ret < 0) {
>>>>>>>> +        error_report("Couldn't enable KVM FWNMI capability");
>>>>>>>> +        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
>>>>>>>> +        return;
>>>>>
>>>>> ..this case.
>>>>
>>>> And this is when we have the KVM support but due to some problem with
>>>> either KVM or QEMU we are unable to enable cap_ppc_fwnmi.
>>>>
>>>>>
>>>>>>>> +    }
>>>>>>>> +
>>>>>>>> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
>>>>>>>> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>>>>>>>> +                                   SpaprMachineState *spapr,
>>>>>>>> +                                   uint32_t token, uint32_t nargs,
>>>>>>>> +                                   target_ulong args,
>>>>>>>> +                                   uint32_t nret, target_ulong rets)
>>>>>>>> +{
>>>>>>>> +    if (spapr->guest_machine_check_addr == -1) {
>>>>>>>> +        /* NMI register not called */
>>>>>>>> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>>>>>>>> +    } else {
>>>>>>>> +        /*
>>>>>>>> +         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
>>>>>>>> +         * hence unset mc_status.
>>>>>>>> +         */
>>>>>>>> +        spapr->mc_status = -1;
>>>>>>>> +        qemu_cond_signal(&spapr->mc_delivery_cond);
>>>>>>>> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);
>>>>>>>
>>>>>>> Hrm.  We add the blocker at the mce request point.  First, that's in
>>>>>>> another patch, which isn't great.  Second, does that mean we could add
>>>>>>> multiple times if we get an MCE on multiple CPUs?  Will that work and
>>>>>>> correctly match adds and removes properly?
>>>>>>
>>>>>> If it is fine to move the migration patch as the last patch in the
>>>>>> sequence, then we will have add and del blocker in the same patch.
>>>>>>
>>>>>> And yes we could add multiple times if we get MCE on multiple CPUs and
>>>>>> as all those cpus call interlock there should be matching number of
>>>>>> delete blockers.
>>>>>
>>>>> Ok, and I think adding the same pointer to the list multiple times
>>>>> will work ok.
>>>>
>>>> I think so
>>>>
>>>>>
>>>>> Btw, add_blocker() can fail - have you handled failure conditions?
>>>>
>>>> yes, I am handling it.
>>>>
>>>>>
>>>>
>>>
>>
> 

-- 
Regards,
Aravinda


Re: [Qemu-devel] [Qemu-ppc] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by Greg Kurz 6 years, 7 months ago
On Fri, 5 Jul 2019 16:41:25 +0530
Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:

> 
> 
> On Friday 05 July 2019 12:07 AM, Greg Kurz wrote:
> > On Thu, 4 Jul 2019 10:49:05 +0530
> > Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> > 
> >>
> >>
> >> On Thursday 04 July 2019 06:42 AM, David Gibson wrote:
> >>> On Wed, Jul 03, 2019 at 02:30:31PM +0530, Aravinda Prasad wrote:
> >>>>
> >>>>
> >>>> On Wednesday 03 July 2019 08:50 AM, David Gibson wrote:
> >>>>> On Tue, Jul 02, 2019 at 04:10:08PM +0530, Aravinda Prasad wrote:
> >>>>>>
> >>>>>>
> >>>>>> On Tuesday 02 July 2019 09:41 AM, David Gibson wrote:
> >>>>>>> On Wed, Jun 12, 2019 at 02:51:38PM +0530, Aravinda Prasad wrote:
> >>>>>>>> This patch adds support in QEMU to handle "ibm,nmi-register"
> >>>>>>>> and "ibm,nmi-interlock" RTAS calls and sets the default
> >>>>>>>> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
> >>>>>>>> type 4.0.
> >>>>>>>>
> >>>>>>>> The machine check notification address is saved when the
> >>>>>>>> OS issues "ibm,nmi-register" RTAS call.
> >>>>>>>>
> >>>>>>>> This patch also handles the case when multiple processors
> >>>>>>>> experience machine check at or about the same time by
> >>>>>>>> handling "ibm,nmi-interlock" call. In such cases, as per
> >>>>>>>> PAPR, subsequent processors serialize waiting for the first
> >>>>>>>> processor to issue the "ibm,nmi-interlock" call. The second
> >>>>>>>> processor that also received a machine check error waits
> >>>>>>>> till the first processor is done reading the error log.
> >>>>>>>> The first processor issues "ibm,nmi-interlock" call
> >>>>>>>> when the error log is consumed.
> >>>>>>>>
> >>>>>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >>>>>>>> ---
> >>>>>>>>  hw/ppc/spapr.c         |    6 ++++-
> >>>>>>>>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
> >>>>>>>>  include/hw/ppc/spapr.h |    5 +++-
> >>>>>>>>  3 files changed, 72 insertions(+), 2 deletions(-)
> >>>>>>>>
> >>>>>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >>>>>>>> index 3d6d139..213d493 100644
> >>>>>>>> --- a/hw/ppc/spapr.c
> >>>>>>>> +++ b/hw/ppc/spapr.c
> >>>>>>>> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
> >>>>>>>>          /* Create the error string for live migration blocker */
> >>>>>>>>          error_setg(&spapr->fwnmi_migration_blocker,
> >>>>>>>>                  "Live migration not supported during machine check handling");
> >>>>>>>> +
> >>>>>>>> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
> >>>>>>>> +        spapr_fwnmi_register();
> >>>>>>>>      }
> >>>>>>>>  
> >>>>>>>>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
> >>>>>>>> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>>>>>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >>>>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>>>>>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> >>>>>>>> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >>>>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
> >>>>>>>
> >>>>>>> Turning this on by default really isn't ok if it stops you running TCG
> >>>>>>> guests at all.
> >>>>>>
> >>>>>> If so this can be "off" by default until TCG is supported.
> >>>>>>
> >>>>>>>
> >>>>>>>>      spapr_caps_add_properties(smc, &error_abort);
> >>>>>>>>      smc->irq = &spapr_irq_dual;
> >>>>>>>>      smc->dr_phb_enabled = true;
> >>>>>>>> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
> >>>>>>>>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
> >>>>>>>>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
> >>>>>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
> >>>>>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >>>>>>>
> >>>>>>> We're now well past 4.0, and in fact we're about to go into soft
> >>>>>>> freeze for 4.1, so we're going to miss that too.  So 4.1 and earlier
> >>>>>>> will need to retain the old default.
> >>>>>>
> >>>>>> ok.
> >>>>>>
> >>>>>>>
> >>>>>>>>  }
> >>>>>>>>  
> >>>>>>>>  DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
> >>>>>>>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> >>>>>>>> index a015a80..e010cb2 100644
> >>>>>>>> --- a/hw/ppc/spapr_rtas.c
> >>>>>>>> +++ b/hw/ppc/spapr_rtas.c
> >>>>>>>> @@ -49,6 +49,7 @@
> >>>>>>>>  #include "hw/ppc/fdt.h"
> >>>>>>>>  #include "target/ppc/mmu-hash64.h"
> >>>>>>>>  #include "target/ppc/mmu-book3s-v3.h"
> >>>>>>>> +#include "migration/blocker.h"
> >>>>>>>>  
> >>>>>>>>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >>>>>>>>                                     uint32_t token, uint32_t nargs,
> >>>>>>>> @@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >>>>>>>>      rtas_st(rets, 1, 100);
> >>>>>>>>  }
> >>>>>>>>  
> >>>>>>>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
> >>>>>>>> +                                  SpaprMachineState *spapr,
> >>>>>>>> +                                  uint32_t token, uint32_t nargs,
> >>>>>>>> +                                  target_ulong args,
> >>>>>>>> +                                  uint32_t nret, target_ulong rets)
> >>>>>>>> +{
> >>>>>>>> +    int ret;
> >>>>>>>> +    hwaddr rtas_addr = spapr_get_rtas_addr();
> >>>>>>>> +
> >>>>>>>> +    if (!rtas_addr) {
> >>>>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >>>>>>>> +        return;
> >>>>>>>> +    }
> >>>>>>>> +
> >>>>>>>> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
> >>>>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >>>>>>>> +        return;
> >>>>>>>> +    }
> >>>>>>>> +
> >>>>>>>> +    ret = kvmppc_fwnmi_enable(cpu);
> >>>>>>>> +    if (ret == 1) {
> >>>>>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >>>>>>>
> >>>>>>> I don't understand this case separate from the others.  We've already
> >>>>>>> set the cap, so fwnmi support should be checked and available.
> >>>>>>
> >>>>>> But we have not enabled fwnmi in KVM. kvmppc_fwnmi_enable() returns 1 if
> >>>>>> cap_ppc_fwnmi is not available in KVM.
> >>>>>
> >>>>> But you've checked for the presence of the extension, yes?  So a
> >>>>> failure to enable the cap would be unexpected.  In which case how does
> >>>>> this case differ from.. 
> >>>>
> >>>> No, this is the function where I check for the presence of the
> >>>> extension. In kvm_arch_init() we just set cap_ppc_fwnmi to 1 if KVM
> >>>> support is available, but don't take any action if unavailable.
> >>>
> >>> Yeah, that's not ok.  You should be checking for the presence of the
> >>> extension in the .apply() function.  If you start up with the spapr
> >>> cap selected then failing at nmi-register time means something has
> >>> gone badly wrong.
> >>
> >> So, I should check for two things in the .apply() function: first if
> >> cap_ppc_fwnmi is supported and second if cap_ppc_fwnmi is enabled in KVM.
> >>
> >> In that case kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0) should be
> >> called during spapr_machine_init().
> >>
> >> So, we will fail to boot (when SPAPR_CAP_FWNMI_MCE=ON) if cap_ppc_fwnmi
> >> can't be enabled irrespective of whether a guest issues nmi,register or not.
> >>
> > 
> > Yes. The idea is that we don't want to expose some feature to the guest
> > if we already know it cannot work. The same stands for migration, if
> > we know the destination host doesn't support the feature, we don't want
> > to confuse the guest because the feature disappeared unexpectedly. The
> > correct way to address that is to check the feature is supported in QEMU
> > and/or KVM before we start the guest or before we accept a migration
> > stream and fail early if we can't provide the feature.
> 
> ok.
> 
> > 
> > Speaking of migration, kvmppc_fwnmi_enable() should be called in a
> > post load callback... and the problem I see is that this is a vCPU
> > ioctl. So either we add a state "I should enable FWNMI on post load"
> > to the vCPU that called nmi,register, or maybe first_cpu can do the
> > trick in a post load callback of vmstate_spapr_machine_check.
> 
> To summarize the changes I am planning:
> 
> If SPAPR_CAP_FWNMI_MCE=ON, then enable cap_ppc_fwnmi in KVM. If
> cap_ppc_fwnmi can't be enabled then fail early. This can be included in
> spapr_machine_init() function.
> 
> During migration, if SPAPR_CAP_FWNMI_MCE=ON then enable cap_ppc_fwnmi by
> having a .post_load callback which calls kvmppc_fwnmi_enable(). Fail the
> migration if we can't enable cap_ppc_fwnmi on the destination host.
> 

LGTM

> > 
> > BTW, since FWNMI is a platform wide feature, ie. you don't need to
> > enable it on a per-CPU basis), why is KVM_CAP_PPC_FWNMI a vCPU ioctl
> > and not a VM ioctl ?
> 
> I think it should be VM ioctl, let me check.
> 

It is indeed described as a VM ioctl in Documentation/virtual/kvm/api.txt
but...

static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
				     struct kvm_enable_cap *cap)
{
[...]
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
	case KVM_CAP_PPC_FWNMI:
		r = -EINVAL;
		if (!is_kvmppc_hv_enabled(vcpu->kvm))
			break;
		r = 0;
		vcpu->kvm->arch.fwnmi_enabled = true;
		break;
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
[...]
}

I guess it should have been rather added to kvm_vm_ioctl_enable_cap(),
but since this was released in 4.13, I'm afraid we'll need to support
this variant in QEMU anyway.

> Regards,
> Aravinda
> 
> > 
> >>>
> >>> This is necessary for migration: if you start on a system with nmi
> >>> support and the guest registers for it, you can't then migrate safely
> >>> to a system that doesn't have nmi support.  The way to handle that
> >>> case is to have qemu fail to even start up on a destination without
> >>> the support.
> >>>
> >>>> So this case is when we are running an old version of KVM with no
> >>>> cap_ppc_fwnmi support.
> >>>>
> >>>>>
> >>>>>>
> >>>>>>>
> >>>>>>>> +        return;
> >>>>>>>> +    } else if (ret < 0) {
> >>>>>>>> +        error_report("Couldn't enable KVM FWNMI capability");
> >>>>>>>> +        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
> >>>>>>>> +        return;
> >>>>>
> >>>>> ..this case.
> >>>>
> >>>> And this is when we have the KVM support but due to some problem with
> >>>> either KVM or QEMU we are unable to enable cap_ppc_fwnmi.
> >>>>
> >>>>>
> >>>>>>>> +    }
> >>>>>>>> +
> >>>>>>>> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
> >>>>>>>> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> >>>>>>>> +}
> >>>>>>>> +
> >>>>>>>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> >>>>>>>> +                                   SpaprMachineState *spapr,
> >>>>>>>> +                                   uint32_t token, uint32_t nargs,
> >>>>>>>> +                                   target_ulong args,
> >>>>>>>> +                                   uint32_t nret, target_ulong rets)
> >>>>>>>> +{
> >>>>>>>> +    if (spapr->guest_machine_check_addr == -1) {
> >>>>>>>> +        /* NMI register not called */
> >>>>>>>> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> >>>>>>>> +    } else {
> >>>>>>>> +        /*
> >>>>>>>> +         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
> >>>>>>>> +         * hence unset mc_status.
> >>>>>>>> +         */
> >>>>>>>> +        spapr->mc_status = -1;
> >>>>>>>> +        qemu_cond_signal(&spapr->mc_delivery_cond);
> >>>>>>>> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);
> >>>>>>>
> >>>>>>> Hrm.  We add the blocker at the mce request point.  First, that's in
> >>>>>>> another patch, which isn't great.  Second, does that mean we could add
> >>>>>>> multiple times if we get an MCE on multiple CPUs?  Will that work and
> >>>>>>> correctly match adds and removes properly?
> >>>>>>
> >>>>>> If it is fine to move the migration patch as the last patch in the
> >>>>>> sequence, then we will have add and del blocker in the same patch.
> >>>>>>
> >>>>>> And yes we could add multiple times if we get MCE on multiple CPUs and
> >>>>>> as all those cpus call interlock there should be matching number of
> >>>>>> delete blockers.
> >>>>>
> >>>>> Ok, and I think adding the same pointer to the list multiple times
> >>>>> will work ok.
> >>>>
> >>>> I think so
> >>>>
> >>>>>
> >>>>> Btw, add_blocker() can fail - have you handled failure conditions?
> >>>>
> >>>> yes, I am handling it.
> >>>>
> >>>>>
> >>>>
> >>>
> >>
> > 
> 


Re: [Qemu-devel] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by Greg Kurz 6 years, 7 months ago
On Wed, 12 Jun 2019 14:51:38 +0530
Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:

> This patch adds support in QEMU to handle "ibm,nmi-register"
> and "ibm,nmi-interlock" RTAS calls and sets the default
> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
> type 4.0.
> 

Next machine type is 4.1.

> The machine check notification address is saved when the
> OS issues "ibm,nmi-register" RTAS call.
> 
> This patch also handles the case when multiple processors
> experience machine check at or about the same time by
> handling "ibm,nmi-interlock" call. In such cases, as per
> PAPR, subsequent processors serialize waiting for the first
> processor to issue the "ibm,nmi-interlock" call. The second
> processor that also received a machine check error waits
> till the first processor is done reading the error log.
> The first processor issues "ibm,nmi-interlock" call
> when the error log is consumed.
> 
> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr.c         |    6 ++++-
>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/hw/ppc/spapr.h |    5 +++-
>  3 files changed, 72 insertions(+), 2 deletions(-)
> 
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index 3d6d139..213d493 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
>          /* Create the error string for live migration blocker */
>          error_setg(&spapr->fwnmi_migration_blocker,
>                  "Live migration not supported during machine check handling");
> +
> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
> +        spapr_fwnmi_register();

IIRC this was supposed to depend on SPAPR_CAP_FWNMI_MCE being ON.

>      }
>  
>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
>      spapr_caps_add_properties(smc, &error_abort);
>      smc->irq = &spapr_irq_dual;
>      smc->dr_phb_enabled = true;
> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;

This should have been put into spapr_machine_4_0_class_options().

But unless you manage to get this merged before soft-freeze (2019-07-02),
I'm afraid this will be a 4.2 feature.

>  }
>  
>  DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> index a015a80..e010cb2 100644
> --- a/hw/ppc/spapr_rtas.c
> +++ b/hw/ppc/spapr_rtas.c
> @@ -49,6 +49,7 @@
>  #include "hw/ppc/fdt.h"
>  #include "target/ppc/mmu-hash64.h"
>  #include "target/ppc/mmu-book3s-v3.h"
> +#include "migration/blocker.h"
>  
>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
>                                     uint32_t token, uint32_t nargs,
> @@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
>      rtas_st(rets, 1, 100);
>  }
>  
> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
> +                                  SpaprMachineState *spapr,
> +                                  uint32_t token, uint32_t nargs,
> +                                  target_ulong args,
> +                                  uint32_t nret, target_ulong rets)
> +{
> +    int ret;
> +    hwaddr rtas_addr = spapr_get_rtas_addr();
> +
> +    if (!rtas_addr) {
> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> +        return;
> +    }
> +
> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> +        return;
> +    }
> +
> +    ret = kvmppc_fwnmi_enable(cpu);
> +    if (ret == 1) {
> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> +        return;
> +    } else if (ret < 0) {
> +        error_report("Couldn't enable KVM FWNMI capability");
> +        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
> +        return;
> +    }
> +
> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +}
> +
> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> +                                   SpaprMachineState *spapr,
> +                                   uint32_t token, uint32_t nargs,
> +                                   target_ulong args,
> +                                   uint32_t nret, target_ulong rets)
> +{
> +    if (spapr->guest_machine_check_addr == -1) {
> +        /* NMI register not called */
> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> +    } else {
> +        /*
> +         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
> +         * hence unset mc_status.
> +         */
> +        spapr->mc_status = -1;
> +        qemu_cond_signal(&spapr->mc_delivery_cond);
> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);
> +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> +    }
> +}
> +
>  static struct rtas_call {
>      const char *name;
>      spapr_rtas_fn fn;
> @@ -496,6 +551,14 @@ hwaddr spapr_get_rtas_addr(void)
>      return (hwaddr)fdt32_to_cpu(*rtas_data);
>  }
>  
> +void spapr_fwnmi_register(void)
> +{
> +    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
> +                        rtas_ibm_nmi_register);
> +    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
> +                        rtas_ibm_nmi_interlock);
> +}
> +
>  static void core_rtas_register_types(void)
>  {
>      spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 0dedf0a..7ae53e2 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -637,8 +637,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
>  #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
>  #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
>  #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
> +#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2A)
> +#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2B)
>  
> -#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
> +#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2C)
>  
>  /* RTAS ibm,get-system-parameter token values */
>  #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
> @@ -894,4 +896,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
>  
>  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
>  hwaddr spapr_get_rtas_addr(void);
> +void spapr_fwnmi_register(void);
>  #endif /* HW_SPAPR_H */
> 


Re: [Qemu-devel] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by Aravinda Prasad 6 years, 7 months ago

On Monday 24 June 2019 07:59 PM, Greg Kurz wrote:
> On Wed, 12 Jun 2019 14:51:38 +0530
> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> 
>> This patch adds support in QEMU to handle "ibm,nmi-register"
>> and "ibm,nmi-interlock" RTAS calls and sets the default
>> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
>> type 4.0.
>>
> 
> Next machine type is 4.1.

ok.

> 
>> The machine check notification address is saved when the
>> OS issues "ibm,nmi-register" RTAS call.
>>
>> This patch also handles the case when multiple processors
>> experience machine check at or about the same time by
>> handling "ibm,nmi-interlock" call. In such cases, as per
>> PAPR, subsequent processors serialize waiting for the first
>> processor to issue the "ibm,nmi-interlock" call. The second
>> processor that also received a machine check error waits
>> till the first processor is done reading the error log.
>> The first processor issues "ibm,nmi-interlock" call
>> when the error log is consumed.
>>
>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>> ---
>>  hw/ppc/spapr.c         |    6 ++++-
>>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  include/hw/ppc/spapr.h |    5 +++-
>>  3 files changed, 72 insertions(+), 2 deletions(-)
>>
>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>> index 3d6d139..213d493 100644
>> --- a/hw/ppc/spapr.c
>> +++ b/hw/ppc/spapr.c
>> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
>>          /* Create the error string for live migration blocker */
>>          error_setg(&spapr->fwnmi_migration_blocker,
>>                  "Live migration not supported during machine check handling");
>> +
>> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
>> +        spapr_fwnmi_register();
> 
> IIRC this was supposed to depend on SPAPR_CAP_FWNMI_MCE being ON.

Yes this is inside SPAPR_CAP_FWNMI_MCE check:

if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
    /*
     * Ensure that the rtas image size is less than RTAS_ERROR_LOG_OFFSET
     * or else the rtas image will be overwritten with the rtas error log
     * when a machine check exception is encountered.
     */
    g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);

    /* Resize rtas blob to accommodate error log */
    spapr->rtas_size = RTAS_ERROR_LOG_MAX;

    /* Create the error string for live migration blocker */
    error_setg(&spapr->fwnmi_migration_blocker,
            "Live migration not supported during machine check handling");

    /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
    spapr_fwnmi_register();
}


> 
>>      }
>>  
>>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
>> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
>> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
>>      spapr_caps_add_properties(smc, &error_abort);
>>      smc->irq = &spapr_irq_dual;
>>      smc->dr_phb_enabled = true;
>> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
>>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
>>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> 
> This should have been put into spapr_machine_4_0_class_options().

ok. I will change it.

> 
> But unless you manage to get this merged before soft-freeze (2019-07-02),
> I'm afraid this will be a 4.2 feature.

If there are no other comments, can this be merged to 4.1? I will send a
revised version with the above changes.

Regards,
Aravinda

> 
>>  }
>>  
>>  DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>> index a015a80..e010cb2 100644
>> --- a/hw/ppc/spapr_rtas.c
>> +++ b/hw/ppc/spapr_rtas.c
>> @@ -49,6 +49,7 @@
>>  #include "hw/ppc/fdt.h"
>>  #include "target/ppc/mmu-hash64.h"
>>  #include "target/ppc/mmu-book3s-v3.h"
>> +#include "migration/blocker.h"
>>  
>>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>                                     uint32_t token, uint32_t nargs,
>> @@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>      rtas_st(rets, 1, 100);
>>  }
>>  
>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>> +                                  SpaprMachineState *spapr,
>> +                                  uint32_t token, uint32_t nargs,
>> +                                  target_ulong args,
>> +                                  uint32_t nret, target_ulong rets)
>> +{
>> +    int ret;
>> +    hwaddr rtas_addr = spapr_get_rtas_addr();
>> +
>> +    if (!rtas_addr) {
>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>> +        return;
>> +    }
>> +
>> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>> +        return;
>> +    }
>> +
>> +    ret = kvmppc_fwnmi_enable(cpu);
>> +    if (ret == 1) {
>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>> +        return;
>> +    } else if (ret < 0) {
>> +        error_report("Couldn't enable KVM FWNMI capability");
>> +        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
>> +        return;
>> +    }
>> +
>> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
>> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> +}
>> +
>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>> +                                   SpaprMachineState *spapr,
>> +                                   uint32_t token, uint32_t nargs,
>> +                                   target_ulong args,
>> +                                   uint32_t nret, target_ulong rets)
>> +{
>> +    if (spapr->guest_machine_check_addr == -1) {
>> +        /* NMI register not called */
>> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>> +    } else {
>> +        /*
>> +         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
>> +         * hence unset mc_status.
>> +         */
>> +        spapr->mc_status = -1;
>> +        qemu_cond_signal(&spapr->mc_delivery_cond);
>> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);
>> +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>> +    }
>> +}
>> +
>>  static struct rtas_call {
>>      const char *name;
>>      spapr_rtas_fn fn;
>> @@ -496,6 +551,14 @@ hwaddr spapr_get_rtas_addr(void)
>>      return (hwaddr)fdt32_to_cpu(*rtas_data);
>>  }
>>  
>> +void spapr_fwnmi_register(void)
>> +{
>> +    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
>> +                        rtas_ibm_nmi_register);
>> +    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
>> +                        rtas_ibm_nmi_interlock);
>> +}
>> +
>>  static void core_rtas_register_types(void)
>>  {
>>      spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index 0dedf0a..7ae53e2 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -637,8 +637,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
>>  #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
>>  #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
>>  #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
>> +#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2A)
>> +#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2B)
>>  
>> -#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
>> +#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2C)
>>  
>>  /* RTAS ibm,get-system-parameter token values */
>>  #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
>> @@ -894,4 +896,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
>>  
>>  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
>>  hwaddr spapr_get_rtas_addr(void);
>> +void spapr_fwnmi_register(void);
>>  #endif /* HW_SPAPR_H */
>>
> 

-- 
Regards,
Aravinda

Re: [Qemu-devel] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by Greg Kurz 6 years, 7 months ago
On Tue, 25 Jun 2019 11:46:06 +0530
Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:

> On Monday 24 June 2019 07:59 PM, Greg Kurz wrote:
> > On Wed, 12 Jun 2019 14:51:38 +0530
> > Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> >   
> >> This patch adds support in QEMU to handle "ibm,nmi-register"
> >> and "ibm,nmi-interlock" RTAS calls and sets the default
> >> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
> >> type 4.0.
> >>  
> > 
> > Next machine type is 4.1.  
> 
> ok.
> 
> >   
> >> The machine check notification address is saved when the
> >> OS issues "ibm,nmi-register" RTAS call.
> >>
> >> This patch also handles the case when multiple processors
> >> experience machine check at or about the same time by
> >> handling "ibm,nmi-interlock" call. In such cases, as per
> >> PAPR, subsequent processors serialize waiting for the first
> >> processor to issue the "ibm,nmi-interlock" call. The second
> >> processor that also received a machine check error waits
> >> till the first processor is done reading the error log.
> >> The first processor issues "ibm,nmi-interlock" call
> >> when the error log is consumed.
> >>
> >> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >> ---
> >>  hw/ppc/spapr.c         |    6 ++++-
> >>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
> >>  include/hw/ppc/spapr.h |    5 +++-
> >>  3 files changed, 72 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >> index 3d6d139..213d493 100644
> >> --- a/hw/ppc/spapr.c
> >> +++ b/hw/ppc/spapr.c
> >> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
> >>          /* Create the error string for live migration blocker */
> >>          error_setg(&spapr->fwnmi_migration_blocker,
> >>                  "Live migration not supported during machine check handling");
> >> +
> >> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
> >> +        spapr_fwnmi_register();  
> > 
> > IIRC this was supposed to depend on SPAPR_CAP_FWNMI_MCE being ON.  
> 
> Yes this is inside SPAPR_CAP_FWNMI_MCE check:
> 
> if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
>     /*
>      * Ensure that the rtas image size is less than RTAS_ERROR_LOG_OFFSET
>      * or else the rtas image will be overwritten with the rtas error log
>      * when a machine check exception is encountered.
>      */
>     g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);
> 
>     /* Resize rtas blob to accommodate error log */
>     spapr->rtas_size = RTAS_ERROR_LOG_MAX;
> 
>     /* Create the error string for live migration blocker */
>     error_setg(&spapr->fwnmi_migration_blocker,
>             "Live migration not supported during machine check handling");
> 
>     /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
>     spapr_fwnmi_register();
> }
> 

Oops my bad... sorry for the noise.

> 
> >   
> >>      }
> >>  
> >>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
> >> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> >> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
> >>      spapr_caps_add_properties(smc, &error_abort);
> >>      smc->irq = &spapr_irq_dual;
> >>      smc->dr_phb_enabled = true;
> >> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
> >>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
> >>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
> >>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
> >> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;  
> > 
> > This should have been put into spapr_machine_4_0_class_options().  
> 
> ok. I will change it.
> 
> > 
> > But unless you manage to get this merged before soft-freeze (2019-07-02),
> > I'm afraid this will be a 4.2 feature.  
> 
> If there are no other comments, can this be merged to 4.1? I will send a
> revised version with the above changes.
> 

This is David's call.

> Regards,
> Aravinda
> 
> >   
> >>  }
> >>  
> >>  DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
> >> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
> >> index a015a80..e010cb2 100644
> >> --- a/hw/ppc/spapr_rtas.c
> >> +++ b/hw/ppc/spapr_rtas.c
> >> @@ -49,6 +49,7 @@
> >>  #include "hw/ppc/fdt.h"
> >>  #include "target/ppc/mmu-hash64.h"
> >>  #include "target/ppc/mmu-book3s-v3.h"
> >> +#include "migration/blocker.h"
> >>  
> >>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >>                                     uint32_t token, uint32_t nargs,
> >> @@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
> >>      rtas_st(rets, 1, 100);
> >>  }
> >>  
> >> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
> >> +                                  SpaprMachineState *spapr,
> >> +                                  uint32_t token, uint32_t nargs,
> >> +                                  target_ulong args,
> >> +                                  uint32_t nret, target_ulong rets)
> >> +{
> >> +    int ret;
> >> +    hwaddr rtas_addr = spapr_get_rtas_addr();
> >> +
> >> +    if (!rtas_addr) {
> >> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >> +        return;
> >> +    }
> >> +
> >> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
> >> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >> +        return;
> >> +    }
> >> +
> >> +    ret = kvmppc_fwnmi_enable(cpu);
> >> +    if (ret == 1) {
> >> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
> >> +        return;
> >> +    } else if (ret < 0) {
> >> +        error_report("Couldn't enable KVM FWNMI capability");
> >> +        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
> >> +        return;
> >> +    }
> >> +
> >> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
> >> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> >> +}
> >> +
> >> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
> >> +                                   SpaprMachineState *spapr,
> >> +                                   uint32_t token, uint32_t nargs,
> >> +                                   target_ulong args,
> >> +                                   uint32_t nret, target_ulong rets)
> >> +{
> >> +    if (spapr->guest_machine_check_addr == -1) {
> >> +        /* NMI register not called */
> >> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
> >> +    } else {
> >> +        /*
> >> +         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
> >> +         * hence unset mc_status.
> >> +         */
> >> +        spapr->mc_status = -1;
> >> +        qemu_cond_signal(&spapr->mc_delivery_cond);
> >> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);
> >> +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
> >> +    }
> >> +}
> >> +
> >>  static struct rtas_call {
> >>      const char *name;
> >>      spapr_rtas_fn fn;
> >> @@ -496,6 +551,14 @@ hwaddr spapr_get_rtas_addr(void)
> >>      return (hwaddr)fdt32_to_cpu(*rtas_data);
> >>  }
> >>  
> >> +void spapr_fwnmi_register(void)
> >> +{
> >> +    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
> >> +                        rtas_ibm_nmi_register);
> >> +    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
> >> +                        rtas_ibm_nmi_interlock);
> >> +}
> >> +
> >>  static void core_rtas_register_types(void)
> >>  {
> >>      spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
> >> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> >> index 0dedf0a..7ae53e2 100644
> >> --- a/include/hw/ppc/spapr.h
> >> +++ b/include/hw/ppc/spapr.h
> >> @@ -637,8 +637,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
> >>  #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
> >>  #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
> >>  #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
> >> +#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2A)
> >> +#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2B)
> >>  
> >> -#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
> >> +#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2C)
> >>  
> >>  /* RTAS ibm,get-system-parameter token values */
> >>  #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
> >> @@ -894,4 +896,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
> >>  
> >>  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
> >>  hwaddr spapr_get_rtas_addr(void);
> >> +void spapr_fwnmi_register(void);
> >>  #endif /* HW_SPAPR_H */
> >>  
> >   
> 


Re: [Qemu-devel] [Qemu-ppc] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by Aravinda Prasad 6 years, 7 months ago

On Tuesday 25 June 2019 12:30 PM, Greg Kurz wrote:
> On Tue, 25 Jun 2019 11:46:06 +0530
> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> 
>> On Monday 24 June 2019 07:59 PM, Greg Kurz wrote:
>>> On Wed, 12 Jun 2019 14:51:38 +0530
>>> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
>>>   
>>>> This patch adds support in QEMU to handle "ibm,nmi-register"
>>>> and "ibm,nmi-interlock" RTAS calls and sets the default
>>>> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
>>>> type 4.0.
>>>>  
>>>
>>> Next machine type is 4.1.  
>>
>> ok.
>>
>>>   
>>>> The machine check notification address is saved when the
>>>> OS issues "ibm,nmi-register" RTAS call.
>>>>
>>>> This patch also handles the case when multiple processors
>>>> experience machine check at or about the same time by
>>>> handling "ibm,nmi-interlock" call. In such cases, as per
>>>> PAPR, subsequent processors serialize waiting for the first
>>>> processor to issue the "ibm,nmi-interlock" call. The second
>>>> processor that also received a machine check error waits
>>>> till the first processor is done reading the error log.
>>>> The first processor issues "ibm,nmi-interlock" call
>>>> when the error log is consumed.
>>>>
>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>>>> ---
>>>>  hw/ppc/spapr.c         |    6 ++++-
>>>>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
>>>>  include/hw/ppc/spapr.h |    5 +++-
>>>>  3 files changed, 72 insertions(+), 2 deletions(-)
>>>>
>>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
>>>> index 3d6d139..213d493 100644
>>>> --- a/hw/ppc/spapr.c
>>>> +++ b/hw/ppc/spapr.c
>>>> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
>>>>          /* Create the error string for live migration blocker */
>>>>          error_setg(&spapr->fwnmi_migration_blocker,
>>>>                  "Live migration not supported during machine check handling");
>>>> +
>>>> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
>>>> +        spapr_fwnmi_register();  
>>>
>>> IIRC this was supposed to depend on SPAPR_CAP_FWNMI_MCE being ON.  
>>
>> Yes this is inside SPAPR_CAP_FWNMI_MCE check:
>>
>> if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
>>     /*
>>      * Ensure that the rtas image size is less than RTAS_ERROR_LOG_OFFSET
>>      * or else the rtas image will be overwritten with the rtas error log
>>      * when a machine check exception is encountered.
>>      */
>>     g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);
>>
>>     /* Resize rtas blob to accommodate error log */
>>     spapr->rtas_size = RTAS_ERROR_LOG_MAX;
>>
>>     /* Create the error string for live migration blocker */
>>     error_setg(&spapr->fwnmi_migration_blocker,
>>             "Live migration not supported during machine check handling");
>>
>>     /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
>>     spapr_fwnmi_register();
>> }
>>
> 
> Oops my bad... sorry for the noise.
> 
>>
>>>   
>>>>      }
>>>>  
>>>>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
>>>> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
>>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
>>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
>>>> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
>>>>      spapr_caps_add_properties(smc, &error_abort);
>>>>      smc->irq = &spapr_irq_dual;
>>>>      smc->dr_phb_enabled = true;
>>>> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
>>>>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
>>>>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
>>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
>>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;  
>>>
>>> This should have been put into spapr_machine_4_0_class_options().  
>>
>> ok. I will change it.
>>
>>>
>>> But unless you manage to get this merged before soft-freeze (2019-07-02),
>>> I'm afraid this will be a 4.2 feature.  
>>
>> If there are no other comments, can this be merged to 4.1? I will send a
>> revised version with the above changes.
>>
> 
> This is David's call.

David, can you let me know if this can be merged to 4.1 with the above
minor changes?

> 
>> Regards,
>> Aravinda
>>
>>>   
>>>>  }
>>>>  
>>>>  DEFINE_SPAPR_MACHINE(3_1, "3.1", false);
>>>> diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
>>>> index a015a80..e010cb2 100644
>>>> --- a/hw/ppc/spapr_rtas.c
>>>> +++ b/hw/ppc/spapr_rtas.c
>>>> @@ -49,6 +49,7 @@
>>>>  #include "hw/ppc/fdt.h"
>>>>  #include "target/ppc/mmu-hash64.h"
>>>>  #include "target/ppc/mmu-book3s-v3.h"
>>>> +#include "migration/blocker.h"
>>>>  
>>>>  static void rtas_display_character(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>>>                                     uint32_t token, uint32_t nargs,
>>>> @@ -352,6 +353,60 @@ static void rtas_get_power_level(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>>>      rtas_st(rets, 1, 100);
>>>>  }
>>>>  
>>>> +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
>>>> +                                  SpaprMachineState *spapr,
>>>> +                                  uint32_t token, uint32_t nargs,
>>>> +                                  target_ulong args,
>>>> +                                  uint32_t nret, target_ulong rets)
>>>> +{
>>>> +    int ret;
>>>> +    hwaddr rtas_addr = spapr_get_rtas_addr();
>>>> +
>>>> +    if (!rtas_addr) {
>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_OFF) {
>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    ret = kvmppc_fwnmi_enable(cpu);
>>>> +    if (ret == 1) {
>>>> +        rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
>>>> +        return;
>>>> +    } else if (ret < 0) {
>>>> +        error_report("Couldn't enable KVM FWNMI capability");
>>>> +        rtas_st(rets, 0, RTAS_OUT_HW_ERROR);
>>>> +        return;
>>>> +    }
>>>> +
>>>> +    spapr->guest_machine_check_addr = rtas_ld(args, 1);
>>>> +    rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>>>> +}
>>>> +
>>>> +static void rtas_ibm_nmi_interlock(PowerPCCPU *cpu,
>>>> +                                   SpaprMachineState *spapr,
>>>> +                                   uint32_t token, uint32_t nargs,
>>>> +                                   target_ulong args,
>>>> +                                   uint32_t nret, target_ulong rets)
>>>> +{
>>>> +    if (spapr->guest_machine_check_addr == -1) {
>>>> +        /* NMI register not called */
>>>> +        rtas_st(rets, 0, RTAS_OUT_PARAM_ERROR);
>>>> +    } else {
>>>> +        /*
>>>> +         * vCPU issuing "ibm,nmi-interlock" is done with NMI handling,
>>>> +         * hence unset mc_status.
>>>> +         */
>>>> +        spapr->mc_status = -1;
>>>> +        qemu_cond_signal(&spapr->mc_delivery_cond);
>>>> +        migrate_del_blocker(spapr->fwnmi_migration_blocker);
>>>> +        rtas_st(rets, 0, RTAS_OUT_SUCCESS);
>>>> +    }
>>>> +}
>>>> +
>>>>  static struct rtas_call {
>>>>      const char *name;
>>>>      spapr_rtas_fn fn;
>>>> @@ -496,6 +551,14 @@ hwaddr spapr_get_rtas_addr(void)
>>>>      return (hwaddr)fdt32_to_cpu(*rtas_data);
>>>>  }
>>>>  
>>>> +void spapr_fwnmi_register(void)
>>>> +{
>>>> +    spapr_rtas_register(RTAS_IBM_NMI_REGISTER, "ibm,nmi-register",
>>>> +                        rtas_ibm_nmi_register);
>>>> +    spapr_rtas_register(RTAS_IBM_NMI_INTERLOCK, "ibm,nmi-interlock",
>>>> +                        rtas_ibm_nmi_interlock);
>>>> +}
>>>> +
>>>>  static void core_rtas_register_types(void)
>>>>  {
>>>>      spapr_rtas_register(RTAS_DISPLAY_CHARACTER, "display-character",
>>>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>>>> index 0dedf0a..7ae53e2 100644
>>>> --- a/include/hw/ppc/spapr.h
>>>> +++ b/include/hw/ppc/spapr.h
>>>> @@ -637,8 +637,10 @@ target_ulong spapr_hypercall(PowerPCCPU *cpu, target_ulong opcode,
>>>>  #define RTAS_IBM_CREATE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x27)
>>>>  #define RTAS_IBM_REMOVE_PE_DMA_WINDOW           (RTAS_TOKEN_BASE + 0x28)
>>>>  #define RTAS_IBM_RESET_PE_DMA_WINDOW            (RTAS_TOKEN_BASE + 0x29)
>>>> +#define RTAS_IBM_NMI_REGISTER                   (RTAS_TOKEN_BASE + 0x2A)
>>>> +#define RTAS_IBM_NMI_INTERLOCK                  (RTAS_TOKEN_BASE + 0x2B)
>>>>  
>>>> -#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2A)
>>>> +#define RTAS_TOKEN_MAX                          (RTAS_TOKEN_BASE + 0x2C)
>>>>  
>>>>  /* RTAS ibm,get-system-parameter token values */
>>>>  #define RTAS_SYSPARM_SPLPAR_CHARACTERISTICS      20
>>>> @@ -894,4 +896,5 @@ void spapr_check_pagesize(SpaprMachineState *spapr, hwaddr pagesize,
>>>>  
>>>>  void spapr_set_all_lpcrs(target_ulong value, target_ulong mask);
>>>>  hwaddr spapr_get_rtas_addr(void);
>>>> +void spapr_fwnmi_register(void);
>>>>  #endif /* HW_SPAPR_H */
>>>>  
>>>   
>>
> 
> 

-- 
Regards,
Aravinda


Re: [Qemu-devel] [Qemu-ppc] [PATCH v10 6/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
Posted by David Gibson 6 years, 7 months ago
On Wed, Jun 26, 2019 at 10:43:33AM +0530, Aravinda Prasad wrote:
> 
> 
> On Tuesday 25 June 2019 12:30 PM, Greg Kurz wrote:
> > On Tue, 25 Jun 2019 11:46:06 +0530
> > Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> > 
> >> On Monday 24 June 2019 07:59 PM, Greg Kurz wrote:
> >>> On Wed, 12 Jun 2019 14:51:38 +0530
> >>> Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:
> >>>   
> >>>> This patch adds support in QEMU to handle "ibm,nmi-register"
> >>>> and "ibm,nmi-interlock" RTAS calls and sets the default
> >>>> value of SPAPR_CAP_FWNMI_MCE to SPAPR_CAP_ON for machine
> >>>> type 4.0.
> >>>>  
> >>>
> >>> Next machine type is 4.1.  
> >>
> >> ok.
> >>
> >>>   
> >>>> The machine check notification address is saved when the
> >>>> OS issues "ibm,nmi-register" RTAS call.
> >>>>
> >>>> This patch also handles the case when multiple processors
> >>>> experience machine check at or about the same time by
> >>>> handling "ibm,nmi-interlock" call. In such cases, as per
> >>>> PAPR, subsequent processors serialize waiting for the first
> >>>> processor to issue the "ibm,nmi-interlock" call. The second
> >>>> processor that also received a machine check error waits
> >>>> till the first processor is done reading the error log.
> >>>> The first processor issues "ibm,nmi-interlock" call
> >>>> when the error log is consumed.
> >>>>
> >>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >>>> ---
> >>>>  hw/ppc/spapr.c         |    6 ++++-
> >>>>  hw/ppc/spapr_rtas.c    |   63 ++++++++++++++++++++++++++++++++++++++++++++++++
> >>>>  include/hw/ppc/spapr.h |    5 +++-
> >>>>  3 files changed, 72 insertions(+), 2 deletions(-)
> >>>>
> >>>> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> >>>> index 3d6d139..213d493 100644
> >>>> --- a/hw/ppc/spapr.c
> >>>> +++ b/hw/ppc/spapr.c
> >>>> @@ -2946,6 +2946,9 @@ static void spapr_machine_init(MachineState *machine)
> >>>>          /* Create the error string for live migration blocker */
> >>>>          error_setg(&spapr->fwnmi_migration_blocker,
> >>>>                  "Live migration not supported during machine check handling");
> >>>> +
> >>>> +        /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
> >>>> +        spapr_fwnmi_register();  
> >>>
> >>> IIRC this was supposed to depend on SPAPR_CAP_FWNMI_MCE being ON.  
> >>
> >> Yes this is inside SPAPR_CAP_FWNMI_MCE check:
> >>
> >> if (spapr_get_cap(spapr, SPAPR_CAP_FWNMI_MCE) == SPAPR_CAP_ON) {
> >>     /*
> >>      * Ensure that the rtas image size is less than RTAS_ERROR_LOG_OFFSET
> >>      * or else the rtas image will be overwritten with the rtas error log
> >>      * when a machine check exception is encountered.
> >>      */
> >>     g_assert(spapr->rtas_size < RTAS_ERROR_LOG_OFFSET);
> >>
> >>     /* Resize rtas blob to accommodate error log */
> >>     spapr->rtas_size = RTAS_ERROR_LOG_MAX;
> >>
> >>     /* Create the error string for live migration blocker */
> >>     error_setg(&spapr->fwnmi_migration_blocker,
> >>             "Live migration not supported during machine check handling");
> >>
> >>     /* Register ibm,nmi-register and ibm,nmi-interlock RTAS calls */
> >>     spapr_fwnmi_register();
> >> }
> >>
> > 
> > Oops my bad... sorry for the noise.
> > 
> >>
> >>>   
> >>>>      }
> >>>>  
> >>>>      spapr->rtas_blob = g_malloc(spapr->rtas_size);
> >>>> @@ -4408,7 +4411,7 @@ static void spapr_machine_class_init(ObjectClass *oc, void *data)
> >>>>      smc->default_caps.caps[SPAPR_CAP_NESTED_KVM_HV] = SPAPR_CAP_OFF;
> >>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_ON;
> >>>>      smc->default_caps.caps[SPAPR_CAP_CCF_ASSIST] = SPAPR_CAP_OFF;
> >>>> -    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;
> >>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_ON;
> >>>>      spapr_caps_add_properties(smc, &error_abort);
> >>>>      smc->irq = &spapr_irq_dual;
> >>>>      smc->dr_phb_enabled = true;
> >>>> @@ -4512,6 +4515,7 @@ static void spapr_machine_3_1_class_options(MachineClass *mc)
> >>>>      smc->default_caps.caps[SPAPR_CAP_SBBC] = SPAPR_CAP_BROKEN;
> >>>>      smc->default_caps.caps[SPAPR_CAP_IBS] = SPAPR_CAP_BROKEN;
> >>>>      smc->default_caps.caps[SPAPR_CAP_LARGE_DECREMENTER] = SPAPR_CAP_OFF;
> >>>> +    smc->default_caps.caps[SPAPR_CAP_FWNMI_MCE] = SPAPR_CAP_OFF;  
> >>>
> >>> This should have been put into spapr_machine_4_0_class_options().  
> >>
> >> ok. I will change it.
> >>
> >>>
> >>> But unless you manage to get this merged before soft-freeze (2019-07-02),
> >>> I'm afraid this will be a 4.2 feature.  
> >>
> >> If there are no other comments, can this be merged to 4.1? I will send a
> >> revised version with the above changes.
> >>
> > 
> > This is David's call.
> 
> David, can you let me know if this can be merged to 4.1 with the above
> minor changes?

Nope, sorry.  I've been away, but also this still needs more polish.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson