[PATCH] hw/ppc/spapr_hcall: Return host mitigation characteristics in KVM mode

Gautam Menghani posted 1 patch 7 months, 1 week ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20250410104354.308714-1-gautam@linux.ibm.com
Maintainers: Nicholas Piggin <npiggin@gmail.com>, Daniel Henrique Barboza <danielhb413@gmail.com>, Harsh Prateek Bora <harshpb@linux.ibm.com>, Paolo Bonzini <pbonzini@redhat.com>
There is a newer version of this series
hw/ppc/spapr_hcall.c   | 6 ++++++
include/hw/ppc/spapr.h | 1 +
target/ppc/kvm.c       | 2 ++
3 files changed, 9 insertions(+)
[PATCH] hw/ppc/spapr_hcall: Return host mitigation characteristics in KVM mode
Posted by Gautam Menghani 7 months, 1 week ago
Currently, on a P10 KVM guest, the mitigations seen in the output of
"lscpu" command are different from the host. The reason for this
behaviour is that when the KVM guest makes the "h_get_cpu_characteristics"
hcall, QEMU does not consider the data it received from the host via the
KVM_PPC_GET_CPU_CHAR ioctl, and just uses the values present in
spapr->eff.caps[], which in turn just contain the default values set in
spapr_machine_class_init().

Fix this behaviour by making sure that h_get_cpu_characteristics()
returns the data received from the KVM ioctl for a KVM guest.

Perf impact:
With null syscall benchmark[1], ~45% improvement is observed.

1. Vanilla QEMU
$ ./null_syscall
132.19 ns     456.54 cycles

2. With this patch
$ ./null_syscall
91.18 ns     314.57 cycles

[1]: https://ozlabs.org/~anton/junkcode/null_syscall.c

Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
---
 hw/ppc/spapr_hcall.c   | 6 ++++++
 include/hw/ppc/spapr.h | 1 +
 target/ppc/kvm.c       | 2 ++
 3 files changed, 9 insertions(+)

diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 406aea4ecb..6aec4e22fc 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1415,6 +1415,12 @@ static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
     uint8_t count_cache_flush_assist = spapr_get_cap(spapr,
                                                      SPAPR_CAP_CCF_ASSIST);
 
+    if (kvm_enabled()) {
+        args[0] = spapr->chars.character;
+        args[1] = spapr->chars.behaviour;
+        return H_SUCCESS;
+    }
+
     switch (safe_cache) {
     case SPAPR_CAP_WORKAROUND:
         characteristics |= H_CPU_CHAR_L1D_FLUSH_ORI30;
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index 39bd5bd5ed..b1e3ee1ae2 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -283,6 +283,7 @@ struct SpaprMachineState {
     Error *fwnmi_migration_blocker;
 
     SpaprWatchdog wds[WDT_MAX_WATCHDOGS];
+    struct kvm_ppc_cpu_char chars;
 };
 
 #define H_SUCCESS         0
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index 992356cb75..fee6c5d131 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2511,6 +2511,7 @@ bool kvmppc_has_cap_xive(void)
 
 static void kvmppc_get_cpu_characteristics(KVMState *s)
 {
+    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
     struct kvm_ppc_cpu_char c;
     int ret;
 
@@ -2528,6 +2529,7 @@ static void kvmppc_get_cpu_characteristics(KVMState *s)
         return;
     }
 
+    spapr->chars = c;
     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
-- 
2.49.0
Re: [PATCH] hw/ppc/spapr_hcall: Return host mitigation characteristics in KVM mode
Posted by Philippe Mathieu-Daudé 7 months, 1 week ago
Hi Gautam,

On 10/4/25 12:43, Gautam Menghani wrote:
> Currently, on a P10 KVM guest, the mitigations seen in the output of
> "lscpu" command are different from the host. The reason for this
> behaviour is that when the KVM guest makes the "h_get_cpu_characteristics"
> hcall, QEMU does not consider the data it received from the host via the
> KVM_PPC_GET_CPU_CHAR ioctl, and just uses the values present in
> spapr->eff.caps[], which in turn just contain the default values set in
> spapr_machine_class_init().
> 
> Fix this behaviour by making sure that h_get_cpu_characteristics()
> returns the data received from the KVM ioctl for a KVM guest.
> 
> Perf impact:
> With null syscall benchmark[1], ~45% improvement is observed.
> 
> 1. Vanilla QEMU
> $ ./null_syscall
> 132.19 ns     456.54 cycles
> 
> 2. With this patch
> $ ./null_syscall
> 91.18 ns     314.57 cycles
> 
> [1]: https://ozlabs.org/~anton/junkcode/null_syscall.c
> 
> Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
> ---
>   hw/ppc/spapr_hcall.c   | 6 ++++++
>   include/hw/ppc/spapr.h | 1 +
>   target/ppc/kvm.c       | 2 ++
>   3 files changed, 9 insertions(+)
> 
> diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> index 406aea4ecb..6aec4e22fc 100644
> --- a/hw/ppc/spapr_hcall.c
> +++ b/hw/ppc/spapr_hcall.c
> @@ -1415,6 +1415,12 @@ static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
>       uint8_t count_cache_flush_assist = spapr_get_cap(spapr,
>                                                        SPAPR_CAP_CCF_ASSIST);
>   
> +    if (kvm_enabled()) {
> +        args[0] = spapr->chars.character;
> +        args[1] = spapr->chars.behaviour;

If kvmppc_get_cpu_characteristics() call fails, we return random data.

Can't we just call kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR)
and kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c) here?

> +        return H_SUCCESS;
> +    }
> +
>       switch (safe_cache) {
>       case SPAPR_CAP_WORKAROUND:
>           characteristics |= H_CPU_CHAR_L1D_FLUSH_ORI30;
> diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> index 39bd5bd5ed..b1e3ee1ae2 100644
> --- a/include/hw/ppc/spapr.h
> +++ b/include/hw/ppc/spapr.h
> @@ -283,6 +283,7 @@ struct SpaprMachineState {
>       Error *fwnmi_migration_blocker;
>   
>       SpaprWatchdog wds[WDT_MAX_WATCHDOGS];
> +    struct kvm_ppc_cpu_char chars;
>   };
>   
>   #define H_SUCCESS         0
> diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> index 992356cb75..fee6c5d131 100644
> --- a/target/ppc/kvm.c
> +++ b/target/ppc/kvm.c
> @@ -2511,6 +2511,7 @@ bool kvmppc_has_cap_xive(void)
>   
>   static void kvmppc_get_cpu_characteristics(KVMState *s)
>   {
> +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
>       struct kvm_ppc_cpu_char c;
>       int ret;
>   
> @@ -2528,6 +2529,7 @@ static void kvmppc_get_cpu_characteristics(KVMState *s)
>           return;
>       }
>   
> +    spapr->chars = c;
>       cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
>       cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
>       cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
Re: [PATCH] hw/ppc/spapr_hcall: Return host mitigation characteristics in KVM mode
Posted by Gautam Menghani 7 months ago
On Thu, Apr 10, 2025 at 02:46:47PM +0200, Philippe Mathieu-Daudé wrote:
> Hi Gautam,
> 
> On 10/4/25 12:43, Gautam Menghani wrote:
> > Currently, on a P10 KVM guest, the mitigations seen in the output of
> > "lscpu" command are different from the host. The reason for this
> > behaviour is that when the KVM guest makes the "h_get_cpu_characteristics"
> > hcall, QEMU does not consider the data it received from the host via the
> > KVM_PPC_GET_CPU_CHAR ioctl, and just uses the values present in
> > spapr->eff.caps[], which in turn just contain the default values set in
> > spapr_machine_class_init().
> > 
> > Fix this behaviour by making sure that h_get_cpu_characteristics()
> > returns the data received from the KVM ioctl for a KVM guest.
> > 
> > Perf impact:
> > With null syscall benchmark[1], ~45% improvement is observed.
> > 
> > 1. Vanilla QEMU
> > $ ./null_syscall
> > 132.19 ns     456.54 cycles
> > 
> > 2. With this patch
> > $ ./null_syscall
> > 91.18 ns     314.57 cycles
> > 
> > [1]: https://ozlabs.org/~anton/junkcode/null_syscall.c
> > 
> > Signed-off-by: Gautam Menghani <gautam@linux.ibm.com>
> > ---
> >   hw/ppc/spapr_hcall.c   | 6 ++++++
> >   include/hw/ppc/spapr.h | 1 +
> >   target/ppc/kvm.c       | 2 ++
> >   3 files changed, 9 insertions(+)
> > 
> > diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
> > index 406aea4ecb..6aec4e22fc 100644
> > --- a/hw/ppc/spapr_hcall.c
> > +++ b/hw/ppc/spapr_hcall.c
> > @@ -1415,6 +1415,12 @@ static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
> >       uint8_t count_cache_flush_assist = spapr_get_cap(spapr,
> >                                                        SPAPR_CAP_CCF_ASSIST);
> > +    if (kvm_enabled()) {
> > +        args[0] = spapr->chars.character;
> > +        args[1] = spapr->chars.behaviour;
> 
> If kvmppc_get_cpu_characteristics() call fails, we return random data.
> 
> Can't we just call kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR)
> and kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c) here?
> 

To handle the IOCTL failure problem, we can make these changes on
top of the main patch:


diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
index 3f7882ab34..d6db1bdab8 100644
--- a/hw/ppc/spapr_hcall.c
+++ b/hw/ppc/spapr_hcall.c
@@ -1402,7 +1402,7 @@ static target_ulong h_get_cpu_characteristics(PowerPCCPU *cpu,
     uint8_t count_cache_flush_assist = spapr_get_cap(spapr,
                                                      SPAPR_CAP_CCF_ASSIST);
 
-    if (kvm_enabled()) {
+    if (kvm_enabled() && spapr->chars.character) {
         args[0] = spapr->chars.character;
         args[1] = spapr->chars.behaviour;
         return H_SUCCESS;
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
index ad47b70799..4f64d392a8 100644
--- a/target/ppc/kvm.c
+++ b/target/ppc/kvm.c
@@ -2500,7 +2500,7 @@ bool kvmppc_has_cap_xive(void)
 static void kvmppc_get_cpu_characteristics(KVMState *s)
 {
     SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
-    struct kvm_ppc_cpu_char c;
+    struct kvm_ppc_cpu_char c = {0};
     int ret;
 
     /* Assume broken */
@@ -2510,11 +2510,11 @@ static void kvmppc_get_cpu_characteristics(KVMState *s)
 
     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
     if (!ret) {
-        return;
+        goto err;
     }
     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
     if (ret < 0) {
-        return;
+        goto err;
     }
 
     spapr->chars = c;
@@ -2523,6 +2523,11 @@ static void kvmppc_get_cpu_characteristics(KVMState *s)
     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
     cap_ppc_count_cache_flush_assist =
         parse_cap_ppc_count_cache_flush_assist(c);
+
+    return;
+
+err:
+    memset(&(spapr->chars), 0, sizeof(struct kvm_ppc_cpu_char));
 }


This change will preserve the existing behaviour when the IOCTL fails.
I'll send a v2 if this looks OK?

Thanks,
Gautam

> > +        return H_SUCCESS;
> > +    }
> > +
> >       switch (safe_cache) {
> >       case SPAPR_CAP_WORKAROUND:
> >           characteristics |= H_CPU_CHAR_L1D_FLUSH_ORI30;
> > diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
> > index 39bd5bd5ed..b1e3ee1ae2 100644
> > --- a/include/hw/ppc/spapr.h
> > +++ b/include/hw/ppc/spapr.h
> > @@ -283,6 +283,7 @@ struct SpaprMachineState {
> >       Error *fwnmi_migration_blocker;
> >       SpaprWatchdog wds[WDT_MAX_WATCHDOGS];
> > +    struct kvm_ppc_cpu_char chars;
> >   };
> >   #define H_SUCCESS         0
> > diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c
> > index 992356cb75..fee6c5d131 100644
> > --- a/target/ppc/kvm.c
> > +++ b/target/ppc/kvm.c
> > @@ -2511,6 +2511,7 @@ bool kvmppc_has_cap_xive(void)
> >   static void kvmppc_get_cpu_characteristics(KVMState *s)
> >   {
> > +    SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine());
> >       struct kvm_ppc_cpu_char c;
> >       int ret;
> > @@ -2528,6 +2529,7 @@ static void kvmppc_get_cpu_characteristics(KVMState *s)
> >           return;
> >       }
> > +    spapr->chars = c;
> >       cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
> >       cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
> >       cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
>