[v7] target-ppc/spapr: Add FWNMI support in QEMU for PowerKVM guests

[Qemu-devel] [PATCH v7 3/6] target/ppc: Handle NMI guest exit

Posted by Aravinda Prasad 6 years, 10 months ago

Memory error such as bit flips that cannot be corrected
by hardware are passed on to the kernel for handling.
If the memory address in error belongs to guest then
the guest kernel is responsible for taking suitable action.
Patch [1] enhances KVM to exit guest with exit reason
set to KVM_EXIT_NMI in such cases. This patch handles
KVM_EXIT_NMI exit.

[1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
    (e20bbd3d and related commits)

Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
---
 hw/ppc/spapr_events.c  |   22 ++++++++++++++++++++++
 include/hw/ppc/spapr.h |    1 +
 target/ppc/kvm.c       |   16 ++++++++++++++++
 target/ppc/kvm_ppc.h   |    2 ++
 4 files changed, 41 insertions(+)

diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
index ae0f093..e7a24ad 100644
--- a/hw/ppc/spapr_events.c
+++ b/hw/ppc/spapr_events.c
@@ -620,6 +620,28 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
                             RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
 }
 
+void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
+{
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
+
+    while (spapr->mc_status != -1) {
+        /*
+         * Check whether the same CPU got machine check error
+         * while still handling the mc error (i.e., before
+         * that CPU called "ibm,nmi-interlock"
+         */
+        if (spapr->mc_status == cpu->vcpu_id) {
+            qemu_system_guest_panicked(NULL);
+        }
+        qemu_cond_wait_iothread(&spapr->mc_delivery_cond);
+        /* If the system is reset meanwhile, then just return */
+        if (spapr->mc_reset) {
+            return;
+        }
+    }
+    spapr->mc_status = cpu->vcpu_id;
+}
+
 static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
                             uint32_t token, uint32_t nargs,
                             target_ulong args,
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index ee5589d..b0d8c18 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -792,6 +792,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
                           Error **errp);
 void spapr_clear_pending_events(SpaprMachineState *spapr);
 int spapr_max_server_number(SpaprMachineState *spapr);
+void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
 
 /* DRC callbacks. */
 void spapr_core_release(DeviceState *dev);
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 2427c8e..a593448 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -1738,6 +1738,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
         ret = 0;
         break;
 
+    case KVM_EXIT_NMI:
+        DPRINTF("handle NMI exception\n");
+        ret = kvm_handle_nmi(cpu, run);
+        break;
+
     default:
         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
         ret = -1;
@@ -2803,6 +2808,17 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
     return data & 0xffff;
 }
 
+int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run)
+{
+    bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
+
+    cpu_synchronize_state(CPU(cpu));
+
+    spapr_mce_req_event(cpu, recovered);
+
+    return 0;
+}
+
 int kvmppc_enable_hwrng(void)
 {
     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
index 2c2ea30..df5e85f 100644
--- a/target/ppc/kvm_ppc.h
+++ b/target/ppc/kvm_ppc.h
@@ -80,6 +80,8 @@ bool kvmppc_hpt_needs_host_contiguous_pages(void);
 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp);
 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online);
 
+int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run);
+
 #else
 
 static inline uint32_t kvmppc_get_tbfreq(void)

Re: [Qemu-devel] [PATCH v7 3/6] target/ppc: Handle NMI guest exit

Posted by David Gibson 6 years, 10 months ago

On Fri, Mar 22, 2019 at 12:03:58PM +0530, Aravinda Prasad wrote:
> Memory error such as bit flips that cannot be corrected
> by hardware are passed on to the kernel for handling.
> If the memory address in error belongs to guest then
> the guest kernel is responsible for taking suitable action.
> Patch [1] enhances KVM to exit guest with exit reason
> set to KVM_EXIT_NMI in such cases. This patch handles
> KVM_EXIT_NMI exit.
> 
> [1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
>     (e20bbd3d and related commits)
> 
> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> ---
>  hw/ppc/spapr_events.c  |   22 ++++++++++++++++++++++
>  include/hw/ppc/spapr.h |    1 +
>  target/ppc/kvm.c       |   16 ++++++++++++++++
>  target/ppc/kvm_ppc.h   |    2 ++
>  4 files changed, 41 insertions(+)
> 
> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> index ae0f093..e7a24ad 100644
> --- a/hw/ppc/spapr_events.c
> +++ b/hw/ppc/spapr_events.c
> @@ -620,6 +620,28 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
>                              RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
>  }
>  
> +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> +{
> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> +
> +    while (spapr->mc_status != -1) {
> +        /*
> +         * Check whether the same CPU got machine check error
> +         * while still handling the mc error (i.e., before
> +         * that CPU called "ibm,nmi-interlock"
> +         */
> +        if (spapr->mc_status == cpu->vcpu_id) {
> +            qemu_system_guest_panicked(NULL);
> +        }
> +        qemu_cond_wait_iothread(&spapr->mc_delivery_cond);
> +        /* If the system is reset meanwhile, then just return */
> +        if (spapr->mc_reset) {

I don't really see what this accomplishes.  IIUC mc_reset is true from
reset time until nmi-register is called.  Which means you could just
check for guest_mnachine_check_addre being unset - in which case don't
you need to fallback to the old machine check behaviour anyway?

> +            return;
> +        }
> +    }
> +    spapr->mc_status = cpu->vcpu_id;
> +}
> +
>  static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
>                              uint32_t token, uint32_t nargs,
>                              target_ulong args,
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index ee5589d..b0d8c18 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -792,6 +792,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
>                            Error **errp);
>  void spapr_clear_pending_events(SpaprMachineState *spapr);
>  int spapr_max_server_number(SpaprMachineState *spapr);
> +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
>  
>  /* DRC callbacks. */
>  void spapr_core_release(DeviceState *dev);
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index 2427c8e..a593448 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -1738,6 +1738,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
>          ret = 0;
>          break;
>  
> +    case KVM_EXIT_NMI:
> +        DPRINTF("handle NMI exception\n");

tracepoints are generally preferred to new DPRINTFs.

> +        ret = kvm_handle_nmi(cpu, run);
> +        break;
> +
>      default:
>          fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
>          ret = -1;
> @@ -2803,6 +2808,17 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
>      return data & 0xffff;
>  }
>  
> +int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run)
> +{
> +    bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
> +
> +    cpu_synchronize_state(CPU(cpu));
> +
> +    spapr_mce_req_event(cpu, recovered);
> +
> +    return 0;
> +}
> +
>  int kvmppc_enable_hwrng(void)
>  {
>      if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> index 2c2ea30..df5e85f 100644
> --- a/target/ppc/kvm_ppc.h
> +++ b/target/ppc/kvm_ppc.h
> @@ -80,6 +80,8 @@ bool kvmppc_hpt_needs_host_contiguous_pages(void);
>  void kvm_check_mmu(PowerPCCPU *cpu, Error **errp);
>  void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online);
>  
> +int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run);
> +
>  #else
>  
>  static inline uint32_t kvmppc_get_tbfreq(void)
> 

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

Re: [Qemu-devel] [PATCH v7 3/6] target/ppc: Handle NMI guest exit

Posted by Aravinda Prasad 6 years, 10 months ago


On Monday 25 March 2019 11:52 AM, David Gibson wrote:
> On Fri, Mar 22, 2019 at 12:03:58PM +0530, Aravinda Prasad wrote:
>> Memory error such as bit flips that cannot be corrected
>> by hardware are passed on to the kernel for handling.
>> If the memory address in error belongs to guest then
>> the guest kernel is responsible for taking suitable action.
>> Patch [1] enhances KVM to exit guest with exit reason
>> set to KVM_EXIT_NMI in such cases. This patch handles
>> KVM_EXIT_NMI exit.
>>
>> [1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
>>     (e20bbd3d and related commits)
>>
>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>> ---
>>  hw/ppc/spapr_events.c  |   22 ++++++++++++++++++++++
>>  include/hw/ppc/spapr.h |    1 +
>>  target/ppc/kvm.c       |   16 ++++++++++++++++
>>  target/ppc/kvm_ppc.h   |    2 ++
>>  4 files changed, 41 insertions(+)
>>
>> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
>> index ae0f093..e7a24ad 100644
>> --- a/hw/ppc/spapr_events.c
>> +++ b/hw/ppc/spapr_events.c
>> @@ -620,6 +620,28 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
>>                              RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
>>  }
>>  
>> +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>> +{
>> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>> +
>> +    while (spapr->mc_status != -1) {
>> +        /*
>> +         * Check whether the same CPU got machine check error
>> +         * while still handling the mc error (i.e., before
>> +         * that CPU called "ibm,nmi-interlock"
>> +         */
>> +        if (spapr->mc_status == cpu->vcpu_id) {
>> +            qemu_system_guest_panicked(NULL);
>> +        }
>> +        qemu_cond_wait_iothread(&spapr->mc_delivery_cond);
>> +        /* If the system is reset meanwhile, then just return */
>> +        if (spapr->mc_reset) {
> 
> I don't really see what this accomplishes.  IIUC mc_reset is true from
> reset time until nmi-register is called.  Which means you could just
> check for guest_mnachine_check_addre being unset - in which case don't
> you need to fallback to the old machine check behaviour anyway?

We can check for guest_machine_check_addr being unset instead of mc_reset.

Do we need any kind of memory barriers to ensure that this thread reads
the updated guest_machine_check_addr/mc_reset?

We don't have to fallback to the old machine check behavior, because
guest_machine_check_addr is unset only during system reset.

> 
>> +            return;
>> +        }
>> +    }
>> +    spapr->mc_status = cpu->vcpu_id;
>> +}
>> +
>>  static void check_exception(PowerPCCPU *cpu, SpaprMachineState *spapr,
>>                              uint32_t token, uint32_t nargs,
>>                              target_ulong args,
>> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
>> index ee5589d..b0d8c18 100644
>> --- a/include/hw/ppc/spapr.h
>> +++ b/include/hw/ppc/spapr.h
>> @@ -792,6 +792,7 @@ void spapr_reallocate_hpt(SpaprMachineState *spapr, int shift,
>>                            Error **errp);
>>  void spapr_clear_pending_events(SpaprMachineState *spapr);
>>  int spapr_max_server_number(SpaprMachineState *spapr);
>> +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered);
>>  
>>  /* DRC callbacks. */
>>  void spapr_core_release(DeviceState *dev);
>> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
>> index 2427c8e..a593448 100644
>> --- a/target/ppc/kvm.c
>> +++ b/target/ppc/kvm.c
>> @@ -1738,6 +1738,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
>>          ret = 0;
>>          break;
>>  
>> +    case KVM_EXIT_NMI:
>> +        DPRINTF("handle NMI exception\n");
> 
> tracepoints are generally preferred to new DPRINTFs.

sure.

> 
>> +        ret = kvm_handle_nmi(cpu, run);
>> +        break;
>> +
>>      default:
>>          fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
>>          ret = -1;
>> @@ -2803,6 +2808,17 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
>>      return data & 0xffff;
>>  }
>>  
>> +int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run)
>> +{
>> +    bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
>> +
>> +    cpu_synchronize_state(CPU(cpu));
>> +
>> +    spapr_mce_req_event(cpu, recovered);
>> +
>> +    return 0;
>> +}
>> +
>>  int kvmppc_enable_hwrng(void)
>>  {
>>      if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
>> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
>> index 2c2ea30..df5e85f 100644
>> --- a/target/ppc/kvm_ppc.h
>> +++ b/target/ppc/kvm_ppc.h
>> @@ -80,6 +80,8 @@ bool kvmppc_hpt_needs_host_contiguous_pages(void);
>>  void kvm_check_mmu(PowerPCCPU *cpu, Error **errp);
>>  void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online);
>>  
>> +int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run);
>> +
>>  #else
>>  
>>  static inline uint32_t kvmppc_get_tbfreq(void)
>>
> 

-- 
Regards,
Aravinda

Re: [Qemu-devel] [PATCH v7 3/6] target/ppc: Handle NMI guest exit

Posted by David Gibson 6 years, 10 months ago

On Mon, Mar 25, 2019 at 01:31:12PM +0530, Aravinda Prasad wrote:
> 
> 
> On Monday 25 March 2019 11:52 AM, David Gibson wrote:
> > On Fri, Mar 22, 2019 at 12:03:58PM +0530, Aravinda Prasad wrote:
> >> Memory error such as bit flips that cannot be corrected
> >> by hardware are passed on to the kernel for handling.
> >> If the memory address in error belongs to guest then
> >> the guest kernel is responsible for taking suitable action.
> >> Patch [1] enhances KVM to exit guest with exit reason
> >> set to KVM_EXIT_NMI in such cases. This patch handles
> >> KVM_EXIT_NMI exit.
> >>
> >> [1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
> >>     (e20bbd3d and related commits)
> >>
> >> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >> ---
> >>  hw/ppc/spapr_events.c  |   22 ++++++++++++++++++++++
> >>  include/hw/ppc/spapr.h |    1 +
> >>  target/ppc/kvm.c       |   16 ++++++++++++++++
> >>  target/ppc/kvm_ppc.h   |    2 ++
> >>  4 files changed, 41 insertions(+)
> >>
> >> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
> >> index ae0f093..e7a24ad 100644
> >> --- a/hw/ppc/spapr_events.c
> >> +++ b/hw/ppc/spapr_events.c
> >> @@ -620,6 +620,28 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
> >>                              RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
> >>  }
> >>  
> >> +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
> >> +{
> >> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> >> +
> >> +    while (spapr->mc_status != -1) {
> >> +        /*
> >> +         * Check whether the same CPU got machine check error
> >> +         * while still handling the mc error (i.e., before
> >> +         * that CPU called "ibm,nmi-interlock"
> >> +         */
> >> +        if (spapr->mc_status == cpu->vcpu_id) {
> >> +            qemu_system_guest_panicked(NULL);
> >> +        }
> >> +        qemu_cond_wait_iothread(&spapr->mc_delivery_cond);
> >> +        /* If the system is reset meanwhile, then just return */
> >> +        if (spapr->mc_reset) {
> > 
> > I don't really see what this accomplishes.  IIUC mc_reset is true from
> > reset time until nmi-register is called.  Which means you could just
> > check for guest_mnachine_check_addre being unset - in which case don't
> > you need to fallback to the old machine check behaviour anyway?
> 
> We can check for guest_machine_check_addr being unset instead of mc_reset.
> 
> Do we need any kind of memory barriers to ensure that this thread reads
> the updated guest_machine_check_addr/mc_reset?

IIUC these variables are all protected by the global qemu mutex, so
that should include the necessary memory barriers already.

> We don't have to fallback to the old machine check behavior, because
> guest_machine_check_addr is unset only during system reset.

Yes... which means that between reset and re-registering, we should be
using the old machine check behaviour, yes?  Which is exactly this
situation.

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

Re: [Qemu-devel] [PATCH v7 3/6] target/ppc: Handle NMI guest exit

Posted by Aravinda Prasad 6 years, 10 months ago


On Tuesday 26 March 2019 05:01 AM, David Gibson wrote:
> On Mon, Mar 25, 2019 at 01:31:12PM +0530, Aravinda Prasad wrote:
>>
>>
>> On Monday 25 March 2019 11:52 AM, David Gibson wrote:
>>> On Fri, Mar 22, 2019 at 12:03:58PM +0530, Aravinda Prasad wrote:
>>>> Memory error such as bit flips that cannot be corrected
>>>> by hardware are passed on to the kernel for handling.
>>>> If the memory address in error belongs to guest then
>>>> the guest kernel is responsible for taking suitable action.
>>>> Patch [1] enhances KVM to exit guest with exit reason
>>>> set to KVM_EXIT_NMI in such cases. This patch handles
>>>> KVM_EXIT_NMI exit.
>>>>
>>>> [1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
>>>>     (e20bbd3d and related commits)
>>>>
>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>>>> ---
>>>>  hw/ppc/spapr_events.c  |   22 ++++++++++++++++++++++
>>>>  include/hw/ppc/spapr.h |    1 +
>>>>  target/ppc/kvm.c       |   16 ++++++++++++++++
>>>>  target/ppc/kvm_ppc.h   |    2 ++
>>>>  4 files changed, 41 insertions(+)
>>>>
>>>> diff --git a/hw/ppc/spapr_events.c b/hw/ppc/spapr_events.c
>>>> index ae0f093..e7a24ad 100644
>>>> --- a/hw/ppc/spapr_events.c
>>>> +++ b/hw/ppc/spapr_events.c
>>>> @@ -620,6 +620,28 @@ void spapr_hotplug_req_remove_by_count_indexed(SpaprDrcType drc_type,
>>>>                              RTAS_LOG_V6_HP_ACTION_REMOVE, drc_type, &drc_id);
>>>>  }
>>>>  
>>>> +void spapr_mce_req_event(PowerPCCPU *cpu, bool recovered)
>>>> +{
>>>> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>>>> +
>>>> +    while (spapr->mc_status != -1) {
>>>> +        /*
>>>> +         * Check whether the same CPU got machine check error
>>>> +         * while still handling the mc error (i.e., before
>>>> +         * that CPU called "ibm,nmi-interlock"
>>>> +         */
>>>> +        if (spapr->mc_status == cpu->vcpu_id) {
>>>> +            qemu_system_guest_panicked(NULL);
>>>> +        }
>>>> +        qemu_cond_wait_iothread(&spapr->mc_delivery_cond);
>>>> +        /* If the system is reset meanwhile, then just return */
>>>> +        if (spapr->mc_reset) {
>>>
>>> I don't really see what this accomplishes.  IIUC mc_reset is true from
>>> reset time until nmi-register is called.  Which means you could just
>>> check for guest_mnachine_check_addre being unset - in which case don't
>>> you need to fallback to the old machine check behaviour anyway?
>>
>> We can check for guest_machine_check_addr being unset instead of mc_reset.
>>
>> Do we need any kind of memory barriers to ensure that this thread reads
>> the updated guest_machine_check_addr/mc_reset?
> 
> IIUC these variables are all protected by the global qemu mutex, so
> that should include the necessary memory barriers already.

ok.

> 
>> We don't have to fallback to the old machine check behavior, because
>> guest_machine_check_addr is unset only during system reset.
> 
> Yes... which means that between reset and re-registering, we should be
> using the old machine check behaviour, yes?  Which is exactly this
> situation.

Yes..we should use the old behavior during that time window.

> 

-- 
Regards,
Aravinda

Re: [Qemu-devel] [PATCH v7 3/6] target/ppc: Handle NMI guest exit

Posted by Aravinda Prasad 6 years, 10 months ago


On Monday 25 March 2019 11:52 AM, David Gibson wrote:
> On Fri, Mar 22, 2019 at 12:03:58PM +0530, Aravinda Prasad wrote:
>> Memory error such as bit flips that cannot be corrected
>> by hardware are passed on to the kernel for handling.
>> If the memory address in error belongs to guest then
>> the guest kernel is responsible for taking suitable action.
>> Patch [1] enhances KVM to exit guest with exit reason
>> set to KVM_EXIT_NMI in such cases. This patch handles
>> KVM_EXIT_NMI exit.
>>
>> [1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
>>     (e20bbd3d and related commits)
>>
>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>> ---
>>  hw/ppc/spapr_events.c  |   22 ++++++++++++++++++++++
>>  include/hw/ppc/spapr.h |    1 +
>>  target/ppc/kvm.c       |   16 ++++++++++++++++
>>  target/ppc/kvm_ppc.h   |    2 ++
>>  4 files changed, 41 insertions(+)
>>

[...]

>> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
>> index 2427c8e..a593448 100644
>> --- a/target/ppc/kvm.c
>> +++ b/target/ppc/kvm.c
>> @@ -1738,6 +1738,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
>>          ret = 0;
>>          break;
>>  
>> +    case KVM_EXIT_NMI:
>> +        DPRINTF("handle NMI exception\n");
> 
> tracepoints are generally preferred to new DPRINTFs.

I see DPRINTFs used in all other exit reasons in this function. Do you
want me to change this particular exit case to tracepoints? I think it
is better to keep this DPRINTF as of now and change all the DPRINTFs to
tracepoints in a separate patch set.

Regards,
Aravinda

> 
>> +        ret = kvm_handle_nmi(cpu, run);
>> +        break;
>> +
>>      default:
>>          fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
>>          ret = -1;
>> @@ -2803,6 +2808,17 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
>>      return data & 0xffff;
>>  }
>>  
>> +int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run)
>> +{
>> +    bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
>> +
>> +    cpu_synchronize_state(CPU(cpu));
>> +
>> +    spapr_mce_req_event(cpu, recovered);
>> +
>> +    return 0;
>> +}
>> +
>>  int kvmppc_enable_hwrng(void)
>>  {
>>      if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
>> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
>> index 2c2ea30..df5e85f 100644
>> --- a/target/ppc/kvm_ppc.h
>> +++ b/target/ppc/kvm_ppc.h
>> @@ -80,6 +80,8 @@ bool kvmppc_hpt_needs_host_contiguous_pages(void);
>>  void kvm_check_mmu(PowerPCCPU *cpu, Error **errp);
>>  void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online);
>>  
>> +int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run);
>> +
>>  #else
>>  
>>  static inline uint32_t kvmppc_get_tbfreq(void)
>>
> 

-- 
Regards,
Aravinda

Re: [Qemu-devel] [Qemu-ppc] [PATCH v7 3/6] target/ppc: Handle NMI guest exit

Posted by Greg Kurz 6 years, 10 months ago

On Thu, 4 Apr 2019 14:40:45 +0530
Aravinda Prasad <aravinda@linux.vnet.ibm.com> wrote:

> On Monday 25 March 2019 11:52 AM, David Gibson wrote:
> > On Fri, Mar 22, 2019 at 12:03:58PM +0530, Aravinda Prasad wrote:  
> >> Memory error such as bit flips that cannot be corrected
> >> by hardware are passed on to the kernel for handling.
> >> If the memory address in error belongs to guest then
> >> the guest kernel is responsible for taking suitable action.
> >> Patch [1] enhances KVM to exit guest with exit reason
> >> set to KVM_EXIT_NMI in such cases. This patch handles
> >> KVM_EXIT_NMI exit.
> >>
> >> [1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
> >>     (e20bbd3d and related commits)
> >>
> >> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >> ---
> >>  hw/ppc/spapr_events.c  |   22 ++++++++++++++++++++++
> >>  include/hw/ppc/spapr.h |    1 +
> >>  target/ppc/kvm.c       |   16 ++++++++++++++++
> >>  target/ppc/kvm_ppc.h   |    2 ++
> >>  4 files changed, 41 insertions(+)
> >>  
> 
> [...]
> 
> >> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> >> index 2427c8e..a593448 100644
> >> --- a/target/ppc/kvm.c
> >> +++ b/target/ppc/kvm.c
> >> @@ -1738,6 +1738,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
> >>          ret = 0;
> >>          break;
> >>  
> >> +    case KVM_EXIT_NMI:
> >> +        DPRINTF("handle NMI exception\n");  
> > 
> > tracepoints are generally preferred to new DPRINTFs.  
> 
> I see DPRINTFs used in all other exit reasons in this function. Do you
> want me to change this particular exit case to tracepoints? I think it
> is better to keep this DPRINTF as of now and change all the DPRINTFs to
> tracepoints in a separate patch set.
> 

git grep shows that all DPRINTFs (25 of them) are in target/ppc/kvm.c. I guess
this can be addressed with a single, mechanical and easy to review preparatory
patch.

> Regards,
> Aravinda
> 
> >   
> >> +        ret = kvm_handle_nmi(cpu, run);
> >> +        break;
> >> +
> >>      default:
> >>          fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
> >>          ret = -1;
> >> @@ -2803,6 +2808,17 @@ int kvm_arch_msi_data_to_gsi(uint32_t data)
> >>      return data & 0xffff;
> >>  }
> >>  
> >> +int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run)
> >> +{
> >> +    bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV;
> >> +
> >> +    cpu_synchronize_state(CPU(cpu));
> >> +
> >> +    spapr_mce_req_event(cpu, recovered);
> >> +
> >> +    return 0;
> >> +}
> >> +
> >>  int kvmppc_enable_hwrng(void)
> >>  {
> >>      if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
> >> diff --git a/target/ppc/kvm_ppc.h b/target/ppc/kvm_ppc.h
> >> index 2c2ea30..df5e85f 100644
> >> --- a/target/ppc/kvm_ppc.h
> >> +++ b/target/ppc/kvm_ppc.h
> >> @@ -80,6 +80,8 @@ bool kvmppc_hpt_needs_host_contiguous_pages(void);
> >>  void kvm_check_mmu(PowerPCCPU *cpu, Error **errp);
> >>  void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online);
> >>  
> >> +int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run);
> >> +
> >>  #else
> >>  
> >>  static inline uint32_t kvmppc_get_tbfreq(void)
> >>  
> >   
>

Re: [Qemu-devel] [PATCH v7 3/6] target/ppc: Handle NMI guest exit

Posted by David Gibson 6 years, 10 months ago

On Thu, Apr 04, 2019 at 02:40:45PM +0530, Aravinda Prasad wrote:
> 
> 
> On Monday 25 March 2019 11:52 AM, David Gibson wrote:
> > On Fri, Mar 22, 2019 at 12:03:58PM +0530, Aravinda Prasad wrote:
> >> Memory error such as bit flips that cannot be corrected
> >> by hardware are passed on to the kernel for handling.
> >> If the memory address in error belongs to guest then
> >> the guest kernel is responsible for taking suitable action.
> >> Patch [1] enhances KVM to exit guest with exit reason
> >> set to KVM_EXIT_NMI in such cases. This patch handles
> >> KVM_EXIT_NMI exit.
> >>
> >> [1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
> >>     (e20bbd3d and related commits)
> >>
> >> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >> ---
> >>  hw/ppc/spapr_events.c  |   22 ++++++++++++++++++++++
> >>  include/hw/ppc/spapr.h |    1 +
> >>  target/ppc/kvm.c       |   16 ++++++++++++++++
> >>  target/ppc/kvm_ppc.h   |    2 ++
> >>  4 files changed, 41 insertions(+)
> >>
> 
> [...]
> 
> >> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> >> index 2427c8e..a593448 100644
> >> --- a/target/ppc/kvm.c
> >> +++ b/target/ppc/kvm.c
> >> @@ -1738,6 +1738,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
> >>          ret = 0;
> >>          break;
> >>  
> >> +    case KVM_EXIT_NMI:
> >> +        DPRINTF("handle NMI exception\n");
> > 
> > tracepoints are generally preferred to new DPRINTFs.
> 
> I see DPRINTFs used in all other exit reasons in this function. Do you
> want me to change this particular exit case to tracepoints? I think it
> is better to keep this DPRINTF as of now and change all the DPRINTFs to
> tracepoints in a separate patch set.

Ah, good point.  Tracepoints are generally preferred, but since
DPRINTFs are in use here, stick with that (at some point it would be
good to change the whole file, but that's out of scope here).

-- 
David Gibson			| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au	| minimalist, thank you.  NOT _the_ _other_
				| _way_ _around_!
http://www.ozlabs.org/~dgibson

Re: [Qemu-devel] [PATCH v7 3/6] target/ppc: Handle NMI guest exit

Posted by Alexey Kardashevskiy 6 years, 10 months ago


On 05/04/2019 10:17, David Gibson wrote:
> On Thu, Apr 04, 2019 at 02:40:45PM +0530, Aravinda Prasad wrote:
>>
>>
>> On Monday 25 March 2019 11:52 AM, David Gibson wrote:
>>> On Fri, Mar 22, 2019 at 12:03:58PM +0530, Aravinda Prasad wrote:
>>>> Memory error such as bit flips that cannot be corrected
>>>> by hardware are passed on to the kernel for handling.
>>>> If the memory address in error belongs to guest then
>>>> the guest kernel is responsible for taking suitable action.
>>>> Patch [1] enhances KVM to exit guest with exit reason
>>>> set to KVM_EXIT_NMI in such cases. This patch handles
>>>> KVM_EXIT_NMI exit.
>>>>
>>>> [1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
>>>>     (e20bbd3d and related commits)
>>>>
>>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
>>>> ---
>>>>  hw/ppc/spapr_events.c  |   22 ++++++++++++++++++++++
>>>>  include/hw/ppc/spapr.h |    1 +
>>>>  target/ppc/kvm.c       |   16 ++++++++++++++++
>>>>  target/ppc/kvm_ppc.h   |    2 ++
>>>>  4 files changed, 41 insertions(+)
>>>>
>>
>> [...]
>>
>>>> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
>>>> index 2427c8e..a593448 100644
>>>> --- a/target/ppc/kvm.c
>>>> +++ b/target/ppc/kvm.c
>>>> @@ -1738,6 +1738,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
>>>>          ret = 0;
>>>>          break;
>>>>  
>>>> +    case KVM_EXIT_NMI:
>>>> +        DPRINTF("handle NMI exception\n");
>>>
>>> tracepoints are generally preferred to new DPRINTFs.
>>
>> I see DPRINTFs used in all other exit reasons in this function. Do you
>> want me to change this particular exit case to tracepoints? I think it
>> is better to keep this DPRINTF as of now and change all the DPRINTFs to
>> tracepoints in a separate patch set.
> 
> Ah, good point. 

imho not. The kvm.c already knows about traces (there are two) and even
if every other trace in kvm_arch_handle_exit() is DPRINTF (enabled all
at once), having at least one which can be enabled without QEMU
recompile and separately from the others is a small but nice bonus
before someone gets rid of DPRINTF.


> Tracepoints are generally preferred, but since
> DPRINTFs are in use here, stick with that (at some point it would be
> good to change the whole file, but that's out of scope here).


-- 
Alexey Kardashevskiy
IBM OzLabs, LTC Team

e-mail: aik@linux.ibm.com

Re: [Qemu-devel] [Qemu-ppc] [PATCH v7 3/6] target/ppc: Handle NMI guest exit

Posted by Greg Kurz 6 years, 10 months ago

On Fri, 5 Apr 2019 16:04:27 +1100
Alexey Kardashevskiy <aik@linux.ibm.com> wrote:

> On 05/04/2019 10:17, David Gibson wrote:
> > On Thu, Apr 04, 2019 at 02:40:45PM +0530, Aravinda Prasad wrote:  
> >>
> >>
> >> On Monday 25 March 2019 11:52 AM, David Gibson wrote:  
> >>> On Fri, Mar 22, 2019 at 12:03:58PM +0530, Aravinda Prasad wrote:  
> >>>> Memory error such as bit flips that cannot be corrected
> >>>> by hardware are passed on to the kernel for handling.
> >>>> If the memory address in error belongs to guest then
> >>>> the guest kernel is responsible for taking suitable action.
> >>>> Patch [1] enhances KVM to exit guest with exit reason
> >>>> set to KVM_EXIT_NMI in such cases. This patch handles
> >>>> KVM_EXIT_NMI exit.
> >>>>
> >>>> [1] https://www.spinics.net/lists/kvm-ppc/msg12637.html
> >>>>     (e20bbd3d and related commits)
> >>>>
> >>>> Signed-off-by: Aravinda Prasad <aravinda@linux.vnet.ibm.com>
> >>>> ---
> >>>>  hw/ppc/spapr_events.c  |   22 ++++++++++++++++++++++
> >>>>  include/hw/ppc/spapr.h |    1 +
> >>>>  target/ppc/kvm.c       |   16 ++++++++++++++++
> >>>>  target/ppc/kvm_ppc.h   |    2 ++
> >>>>  4 files changed, 41 insertions(+)
> >>>>  
> >>
> >> [...]
> >>  
> >>>> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> >>>> index 2427c8e..a593448 100644
> >>>> --- a/target/ppc/kvm.c
> >>>> +++ b/target/ppc/kvm.c
> >>>> @@ -1738,6 +1738,11 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
> >>>>          ret = 0;
> >>>>          break;
> >>>>  
> >>>> +    case KVM_EXIT_NMI:
> >>>> +        DPRINTF("handle NMI exception\n");  
> >>>
> >>> tracepoints are generally preferred to new DPRINTFs.  
> >>
> >> I see DPRINTFs used in all other exit reasons in this function. Do you
> >> want me to change this particular exit case to tracepoints? I think it
> >> is better to keep this DPRINTF as of now and change all the DPRINTFs to
> >> tracepoints in a separate patch set.  
> > 
> > Ah, good point.   
> 
> imho not. The kvm.c already knows about traces (there are two) and even
> if every other trace in kvm_arch_handle_exit() is DPRINTF (enabled all
> at once), having at least one which can be enabled without QEMU
> recompile and separately from the others is a small but nice bonus
> before someone gets rid of DPRINTF.
> 

Done.

https://patchwork.ozlabs.org/project/qemu-devel/list/?series=101141

> 
> > Tracepoints are generally preferred, but since
> > DPRINTFs are in use here, stick with that (at some point it would be
> > good to change the whole file, but that's out of scope here).  
> 
>

[Qemu-devel] [PATCH v7 1/6] ppc: spapr: Handle "ibm, nmi-register" and "ibm, nmi-interlock" RTAS calls
[Qemu-devel] [PATCH v7 2/6] Wrapper function to wait on condition for the main loop mutex
[Qemu-devel] [PATCH v7 3/6] target/ppc: Handle NMI guest exit
[Qemu-devel] [PATCH v7 4/6] target/ppc: Build rtas error log
[Qemu-devel] [PATCH v7 5/6] ppc: spapr: Enable FWNMI capability
[Qemu-devel] [PATCH v7 6/6] migration: Block migration while handling machine check