When updating guest CPUID entries to emulate runtime behavior, e.g. when
the guest enables a CR4-based feature that is tied to a CPUID flag, also
update the vCPU's cpu_caps accordingly. This will allow replacing all
usage of guest_cpuid_has() with guest_cpu_cap_has().
Take care not to update guest capabilities when KVM is updating CPUID
entries that *may* become the vCPU's CPUID, e.g. if userspace tries to set
bogus CPUID information. No extra call to update cpu_caps is needed as
the cpu_caps are initialized from the incoming guest CPUID, i.e. will
automatically get the updated values.
Note, none of the features in question use guest_cpu_cap_has() at this
time, i.e. aside from settings bits in cpu_caps, this is a glorified nop.
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/cpuid.c | 48 +++++++++++++++++++++++++++++++-------------
1 file changed, 34 insertions(+), 14 deletions(-)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 36bd04030989..37a991439fe6 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -262,31 +262,48 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
}
+static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
+ struct kvm_cpuid_entry2 *entry,
+ unsigned int x86_feature,
+ bool has_feature)
+{
+ if (entry)
+ cpuid_entry_change(entry, x86_feature, has_feature);
+
+ if (vcpu)
+ guest_cpu_cap_change(vcpu, x86_feature, has_feature);
+}
+
static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
int nent)
{
struct kvm_cpuid_entry2 *best;
+ struct kvm_vcpu *caps = vcpu;
+
+ /*
+ * Don't update vCPU capabilities if KVM is updating CPUID entries that
+ * are coming in from userspace!
+ */
+ if (entries != vcpu->arch.cpuid_entries)
+ caps = NULL;
best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
- if (best) {
- /* Update OSXSAVE bit */
- if (boot_cpu_has(X86_FEATURE_XSAVE))
- cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
+
+ if (boot_cpu_has(X86_FEATURE_XSAVE))
+ kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
- cpuid_entry_change(best, X86_FEATURE_APIC,
- vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
+ kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
+ vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
- if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
- cpuid_entry_change(best, X86_FEATURE_MWAIT,
- vcpu->arch.ia32_misc_enable_msr &
- MSR_IA32_MISC_ENABLE_MWAIT);
- }
+ if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
+ kvm_update_feature_runtime(caps, best, X86_FEATURE_MWAIT,
+ vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT);
best = cpuid_entry2_find(entries, nent, 7, 0);
- if (best && boot_cpu_has(X86_FEATURE_PKU))
- cpuid_entry_change(best, X86_FEATURE_OSPKE,
- kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
+ if (boot_cpu_has(X86_FEATURE_PKU))
+ kvm_update_feature_runtime(caps, best, X86_FEATURE_OSPKE,
+ kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
best = cpuid_entry2_find(entries, nent, 0xD, 0);
if (best)
@@ -353,6 +370,9 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* Reset guest capabilities to userspace's guest CPUID definition, i.e.
* honor userspace's definition for features that don't require KVM or
* hardware management/support (or that KVM simply doesn't care about).
+ *
+ * Note, KVM has already done runtime updates on guest CPUID, i.e. this
+ * will also correctly set runtime features in guest CPU capabilities.
*/
for (i = 0; i < NR_KVM_CPU_CAPS; i++) {
const struct cpuid_reg cpuid = reverse_cpuid[i];
--
2.42.0.869.gea05f2083d-goog
On Fri, 2023-11-10 at 15:55 -0800, Sean Christopherson wrote:
> When updating guest CPUID entries to emulate runtime behavior, e.g. when
> the guest enables a CR4-based feature that is tied to a CPUID flag, also
> update the vCPU's cpu_caps accordingly. This will allow replacing all
> usage of guest_cpuid_has() with guest_cpu_cap_has().
>
> Take care not to update guest capabilities when KVM is updating CPUID
> entries that *may* become the vCPU's CPUID, e.g. if userspace tries to set
> bogus CPUID information. No extra call to update cpu_caps is needed as
> the cpu_caps are initialized from the incoming guest CPUID, i.e. will
> automatically get the updated values.
>
> Note, none of the features in question use guest_cpu_cap_has() at this
> time, i.e. aside from settings bits in cpu_caps, this is a glorified nop.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
> arch/x86/kvm/cpuid.c | 48 +++++++++++++++++++++++++++++++-------------
> 1 file changed, 34 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 36bd04030989..37a991439fe6 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -262,31 +262,48 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
> return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
> }
>
> +static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
> + struct kvm_cpuid_entry2 *entry,
> + unsigned int x86_feature,
> + bool has_feature)
> +{
> + if (entry)
> + cpuid_entry_change(entry, x86_feature, has_feature);
> +
> + if (vcpu)
> + guest_cpu_cap_change(vcpu, x86_feature, has_feature);
> +}
> +
> static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> int nent)
> {
> struct kvm_cpuid_entry2 *best;
> + struct kvm_vcpu *caps = vcpu;
> +
> + /*
> + * Don't update vCPU capabilities if KVM is updating CPUID entries that
> + * are coming in from userspace!
> + */
> + if (entries != vcpu->arch.cpuid_entries)
> + caps = NULL;
I think that this should be decided by the caller. Just a boolean will suffice.
Or even better: since the userspace CPUID update is really not important in terms of performance,
why to special case it?
Even if these guest caps are later overwritten, I don't see why we
need to avoid updating them, and in fact introduce a small risk of them not being consistent
with the other cpu caps.
With this we can avoid having the 'cap' variable which is *very* confusing as well.
>
> best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> - if (best) {
> - /* Update OSXSAVE bit */
> - if (boot_cpu_has(X86_FEATURE_XSAVE))
> - cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
> +
> + if (boot_cpu_has(X86_FEATURE_XSAVE))
> + kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
> kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
>
> - cpuid_entry_change(best, X86_FEATURE_APIC,
> - vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> + kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
> + vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
>
> - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> - cpuid_entry_change(best, X86_FEATURE_MWAIT,
> - vcpu->arch.ia32_misc_enable_msr &
> - MSR_IA32_MISC_ENABLE_MWAIT);
> - }
> + if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> + kvm_update_feature_runtime(caps, best, X86_FEATURE_MWAIT,
> + vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT);
>
> best = cpuid_entry2_find(entries, nent, 7, 0);
> - if (best && boot_cpu_has(X86_FEATURE_PKU))
> - cpuid_entry_change(best, X86_FEATURE_OSPKE,
> - kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> + if (boot_cpu_has(X86_FEATURE_PKU))
> + kvm_update_feature_runtime(caps, best, X86_FEATURE_OSPKE,
> + kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
>
> best = cpuid_entry2_find(entries, nent, 0xD, 0);
> if (best)
> @@ -353,6 +370,9 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> * Reset guest capabilities to userspace's guest CPUID definition, i.e.
> * honor userspace's definition for features that don't require KVM or
> * hardware management/support (or that KVM simply doesn't care about).
> + *
> + * Note, KVM has already done runtime updates on guest CPUID, i.e. this
> + * will also correctly set runtime features in guest CPU capabilities.
> */
> for (i = 0; i < NR_KVM_CPU_CAPS; i++) {
> const struct cpuid_reg cpuid = reverse_cpuid[i];
Best regards,
Maxim Levitsky
On Sun, Nov 19, 2023 at 07:35:30PM +0200, Maxim Levitsky wrote:
> On Fri, 2023-11-10 at 15:55 -0800, Sean Christopherson wrote:
> > When updating guest CPUID entries to emulate runtime behavior, e.g. when
> > the guest enables a CR4-based feature that is tied to a CPUID flag, also
> > update the vCPU's cpu_caps accordingly. This will allow replacing all
> > usage of guest_cpuid_has() with guest_cpu_cap_has().
> >
> > Take care not to update guest capabilities when KVM is updating CPUID
> > entries that *may* become the vCPU's CPUID, e.g. if userspace tries to set
> > bogus CPUID information. No extra call to update cpu_caps is needed as
> > the cpu_caps are initialized from the incoming guest CPUID, i.e. will
> > automatically get the updated values.
> >
> > Note, none of the features in question use guest_cpu_cap_has() at this
> > time, i.e. aside from settings bits in cpu_caps, this is a glorified nop.
> >
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > ---
> > arch/x86/kvm/cpuid.c | 48 +++++++++++++++++++++++++++++++-------------
> > 1 file changed, 34 insertions(+), 14 deletions(-)
> >
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index 36bd04030989..37a991439fe6 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -262,31 +262,48 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
> > return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
> > }
> >
> > +static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
> > + struct kvm_cpuid_entry2 *entry,
> > + unsigned int x86_feature,
> > + bool has_feature)
> > +{
> > + if (entry)
> > + cpuid_entry_change(entry, x86_feature, has_feature);
> > +
> > + if (vcpu)
> > + guest_cpu_cap_change(vcpu, x86_feature, has_feature);
> > +}
> > +
> > static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> > int nent)
> > {
> > struct kvm_cpuid_entry2 *best;
> > + struct kvm_vcpu *caps = vcpu;
> > +
> > + /*
> > + * Don't update vCPU capabilities if KVM is updating CPUID entries that
> > + * are coming in from userspace!
> > + */
> > + if (entries != vcpu->arch.cpuid_entries)
> > + caps = NULL;
>
> I think that this should be decided by the caller. Just a boolean will suffice.
kvm_set_cpuid() calls this function only to validate/adjust the temporary
"entries" variable. While kvm_update_cpuid_runtime() calls it to do system
level changes.
So I kind of agree to make the caller fully awared, how about adding a
newly named wrapper for kvm_set_cpuid(), like:
static void kvm_adjust_cpuid_entry(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
int nent)
{
WARN_ON(entries == vcpu->arch.cpuid_entries);
__kvm_update_cpuid_runtime(vcpu, entries, nent);
}
>
> Or even better: since the userspace CPUID update is really not important in terms of performance,
> why to special case it?
>
> Even if these guest caps are later overwritten, I don't see why we
> need to avoid updating them, and in fact introduce a small risk of them not being consistent
IIUC, for kvm_set_cpuid() case, KVM may then fail the userspace cpuid setting,
so we can't change guest caps at this phase.
Thanks,
Yilun
> with the other cpu caps.
>
> With this we can avoid having the 'cap' variable which is *very* confusing as well.
>
>
> >
> > best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> > - if (best) {
> > - /* Update OSXSAVE bit */
> > - if (boot_cpu_has(X86_FEATURE_XSAVE))
> > - cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
> > +
> > + if (boot_cpu_has(X86_FEATURE_XSAVE))
> > + kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
> > kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
> >
> > - cpuid_entry_change(best, X86_FEATURE_APIC,
> > - vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> > + kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
> > + vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> >
> > - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> > - cpuid_entry_change(best, X86_FEATURE_MWAIT,
> > - vcpu->arch.ia32_misc_enable_msr &
> > - MSR_IA32_MISC_ENABLE_MWAIT);
> > - }
> > + if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> > + kvm_update_feature_runtime(caps, best, X86_FEATURE_MWAIT,
> > + vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT);
> >
> > best = cpuid_entry2_find(entries, nent, 7, 0);
> > - if (best && boot_cpu_has(X86_FEATURE_PKU))
> > - cpuid_entry_change(best, X86_FEATURE_OSPKE,
> > - kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> > + if (boot_cpu_has(X86_FEATURE_PKU))
> > + kvm_update_feature_runtime(caps, best, X86_FEATURE_OSPKE,
> > + kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> >
> > best = cpuid_entry2_find(entries, nent, 0xD, 0);
> > if (best)
> > @@ -353,6 +370,9 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> > * Reset guest capabilities to userspace's guest CPUID definition, i.e.
> > * honor userspace's definition for features that don't require KVM or
> > * hardware management/support (or that KVM simply doesn't care about).
> > + *
> > + * Note, KVM has already done runtime updates on guest CPUID, i.e. this
> > + * will also correctly set runtime features in guest CPU capabilities.
> > */
> > for (i = 0; i < NR_KVM_CPU_CAPS; i++) {
> > const struct cpuid_reg cpuid = reverse_cpuid[i];
>
>
> Best regards,
> Maxim Levitsky
>
>
On Fri, Nov 24, 2023, Xu Yilun wrote:
> On Sun, Nov 19, 2023 at 07:35:30PM +0200, Maxim Levitsky wrote:
> > On Fri, 2023-11-10 at 15:55 -0800, Sean Christopherson wrote:
> > > static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> > > int nent)
> > > {
> > > struct kvm_cpuid_entry2 *best;
> > > + struct kvm_vcpu *caps = vcpu;
> > > +
> > > + /*
> > > + * Don't update vCPU capabilities if KVM is updating CPUID entries that
> > > + * are coming in from userspace!
> > > + */
> > > + if (entries != vcpu->arch.cpuid_entries)
> > > + caps = NULL;
> >
> > I think that this should be decided by the caller. Just a boolean will suffice.
I strongly disagree. The _only_ time the caps should be updated is if
entries == vcpu->arch.cpuid_entries, and if entries == cpuid_entires than the caps
should _always_ be updated.
> kvm_set_cpuid() calls this function only to validate/adjust the temporary
> "entries" variable. While kvm_update_cpuid_runtime() calls it to do system
> level changes.
>
> So I kind of agree to make the caller fully awared, how about adding a
> newly named wrapper for kvm_set_cpuid(), like:
>
>
> static void kvm_adjust_cpuid_entry(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> int nent)
>
> {
> WARN_ON(entries == vcpu->arch.cpuid_entries);
> __kvm_update_cpuid_runtime(vcpu, entries, nent);
But taking it a step further, we end up with
WARN_ON_ONCE(update_caps != (entries == vcpu->arch.cpuid_entries));
which is silly since any bugs that would result in the WARN firing can be avoided
by doing:
update_caps = entries == vcpu->arch.cpuid_entries;
which eventually distils down to the code I posted.
> > Or even better: since the userspace CPUID update is really not important in
> > terms of performance, why to special case it?
> >
> > Even if these guest caps are later overwritten, I don't see why we need to
> > avoid updating them, and in fact introduce a small risk of them not being
> > consistent
>
> IIUC, for kvm_set_cpuid() case, KVM may then fail the userspace cpuid setting,
> so we can't change guest caps at this phase.
> Or even better: since the userspace CPUID update is really not important in
> terms of performance, why to special case it?
Yep, and sadly __kvm_update_cpuid_runtime() *must* be invoked before kvm_set_cpuid()
is guaranteed to succeed because the whole point is to massage guest CPUID before
checking for divergences.
> > With this we can avoid having the 'cap' variable which is *very* confusing as well.
I agree the "caps" variable is confusing, but it's the least awful option I see.
The alternatives I can think of are:
1. Update a dummy caps array
2. Take a snapshot of the caps and restore them
3. Have separate paths for updated guest CPUID versus guest caps
#1 would require passing a "u32 *" to guest_cpu_cap_change() (or an equivalent),
which I really, really don't want to do. It' also a waste of cycles, and I'm
skeptical that it would be any less confusing than the proposed code.
#2 increases the complexity of kvm_set_cpuid() by introducing recovery paths, i.e.
adds more things that can break, and again is wasteful (though copying ~100 bytes
or so in a slow path isn't a big deal).
#3 would create unnecessary maintenance burden as we'd have to ensure any changes
hit both paths.
On Mon, Nov 27, 2023 at 04:43:45PM -0800, Sean Christopherson wrote:
> On Fri, Nov 24, 2023, Xu Yilun wrote:
> > On Sun, Nov 19, 2023 at 07:35:30PM +0200, Maxim Levitsky wrote:
> > > On Fri, 2023-11-10 at 15:55 -0800, Sean Christopherson wrote:
> > > > static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> > > > int nent)
> > > > {
> > > > struct kvm_cpuid_entry2 *best;
> > > > + struct kvm_vcpu *caps = vcpu;
> > > > +
> > > > + /*
> > > > + * Don't update vCPU capabilities if KVM is updating CPUID entries that
> > > > + * are coming in from userspace!
> > > > + */
> > > > + if (entries != vcpu->arch.cpuid_entries)
> > > > + caps = NULL;
> > >
> > > I think that this should be decided by the caller. Just a boolean will suffice.
>
> I strongly disagree. The _only_ time the caps should be updated is if
> entries == vcpu->arch.cpuid_entries, and if entries == cpuid_entires than the caps
> should _always_ be updated.
>
> > kvm_set_cpuid() calls this function only to validate/adjust the temporary
> > "entries" variable. While kvm_update_cpuid_runtime() calls it to do system
> > level changes.
> >
> > So I kind of agree to make the caller fully awared, how about adding a
> > newly named wrapper for kvm_set_cpuid(), like:
> >
> >
> > static void kvm_adjust_cpuid_entry(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> > int nent)
> >
> > {
> > WARN_ON(entries == vcpu->arch.cpuid_entries);
> > __kvm_update_cpuid_runtime(vcpu, entries, nent);
>
> But taking it a step further, we end up with
>
> WARN_ON_ONCE(update_caps != (entries == vcpu->arch.cpuid_entries));
>
> which is silly since any bugs that would result in the WARN firing can be avoided
> by doing:
>
> update_caps = entries == vcpu->arch.cpuid_entries;
>
> which eventually distils down to the code I posted.
OK, I agree with you.
My initial idea is to make developers easier to recognize what is
happening by name, without looking into __kvm_update_cpuid_runtime().
But it seems causing other subtle confuse and wastes cycles. Maybe the
comment is already good enough for developers.
Thanks,
Yilun
On 11/11/2023 7:55 AM, Sean Christopherson wrote:
> When updating guest CPUID entries to emulate runtime behavior, e.g. when
> the guest enables a CR4-based feature that is tied to a CPUID flag, also
> update the vCPU's cpu_caps accordingly. This will allow replacing all
> usage of guest_cpuid_has() with guest_cpu_cap_has().
>
> Take care not to update guest capabilities when KVM is updating CPUID
> entries that *may* become the vCPU's CPUID, e.g. if userspace tries to set
> bogus CPUID information. No extra call to update cpu_caps is needed as
> the cpu_caps are initialized from the incoming guest CPUID, i.e. will
> automatically get the updated values.
>
> Note, none of the features in question use guest_cpu_cap_has() at this
> time, i.e. aside from settings bits in cpu_caps, this is a glorified nop.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
> arch/x86/kvm/cpuid.c | 48 +++++++++++++++++++++++++++++++-------------
> 1 file changed, 34 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 36bd04030989..37a991439fe6 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -262,31 +262,48 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
> return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
> }
>
> +static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
> + struct kvm_cpuid_entry2 *entry,
> + unsigned int x86_feature,
> + bool has_feature)
> +{
> + if (entry)
> + cpuid_entry_change(entry, x86_feature, has_feature);
> +
> + if (vcpu)
> + guest_cpu_cap_change(vcpu, x86_feature, has_feature);
> +}
> +
> static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> int nent)
> {
> struct kvm_cpuid_entry2 *best;
> + struct kvm_vcpu *caps = vcpu;
> +
> + /*
> + * Don't update vCPU capabilities if KVM is updating CPUID entries that
> + * are coming in from userspace!
> + */
> + if (entries != vcpu->arch.cpuid_entries)
> + caps = NULL;
Nit, why here we use caps instead of vcpu? Looks a bit weird.
>
> best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> - if (best) {
> - /* Update OSXSAVE bit */
> - if (boot_cpu_has(X86_FEATURE_XSAVE))
> - cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
> +
> + if (boot_cpu_has(X86_FEATURE_XSAVE))
> + kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
> kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
>
> - cpuid_entry_change(best, X86_FEATURE_APIC,
> - vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> + kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
> + vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
>
> - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> - cpuid_entry_change(best, X86_FEATURE_MWAIT,
> - vcpu->arch.ia32_misc_enable_msr &
> - MSR_IA32_MISC_ENABLE_MWAIT);
> - }
> + if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> + kvm_update_feature_runtime(caps, best, X86_FEATURE_MWAIT,
> + vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT);
> 80 characters?
>
> best = cpuid_entry2_find(entries, nent, 7, 0);
> - if (best && boot_cpu_has(X86_FEATURE_PKU))
> - cpuid_entry_change(best, X86_FEATURE_OSPKE,
> - kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> + if (boot_cpu_has(X86_FEATURE_PKU))
> + kvm_update_feature_runtime(caps, best, X86_FEATURE_OSPKE,
> + kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
>
> best = cpuid_entry2_find(entries, nent, 0xD, 0);
> if (best)
> @@ -353,6 +370,9 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> * Reset guest capabilities to userspace's guest CPUID definition, i.e.
> * honor userspace's definition for features that don't require KVM or
> * hardware management/support (or that KVM simply doesn't care about).
> + *
> + * Note, KVM has already done runtime updates on guest CPUID, i.e. this
> + * will also correctly set runtime features in guest CPU capabilities.
> */
> for (i = 0; i < NR_KVM_CPU_CAPS; i++) {
> const struct cpuid_reg cpuid = reverse_cpuid[i];
Reviewed-by: Yang Weijiang <weijiang.yang@intel.com>
On Thu, Nov 16, 2023, Weijiang Yang wrote:
> On 11/11/2023 7:55 AM, Sean Christopherson wrote:
> > static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> > int nent)
> > {
> > struct kvm_cpuid_entry2 *best;
> > + struct kvm_vcpu *caps = vcpu;
> > +
> > + /*
> > + * Don't update vCPU capabilities if KVM is updating CPUID entries that
> > + * are coming in from userspace!
> > + */
> > + if (entries != vcpu->arch.cpuid_entries)
> > + caps = NULL;
>
> Nit, why here we use caps instead of vcpu? Looks a bit weird.
See my response to Robert: https://lore.kernel.org/all/9395d416-cc5c-536d-641e-ffd971b682d1@gmail.com
> > best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> > - if (best) {
> > - /* Update OSXSAVE bit */
> > - if (boot_cpu_has(X86_FEATURE_XSAVE))
> > - cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
> > +
> > + if (boot_cpu_has(X86_FEATURE_XSAVE))
> > + kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
> > kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
> > - cpuid_entry_change(best, X86_FEATURE_APIC,
> > - vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> > + kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
> > + vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> > - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> > - cpuid_entry_change(best, X86_FEATURE_MWAIT,
> > - vcpu->arch.ia32_misc_enable_msr &
> > - MSR_IA32_MISC_ENABLE_MWAIT);
> > - }
> > + if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> > + kvm_update_feature_runtime(caps, best, X86_FEATURE_MWAIT,
> > + vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT);
>
> > 80 characters?
Hmm, yeah, I suppose.
On 11/11/2023 7:55 AM, Sean Christopherson wrote:
> When updating guest CPUID entries to emulate runtime behavior, e.g. when
> the guest enables a CR4-based feature that is tied to a CPUID flag, also
> update the vCPU's cpu_caps accordingly. This will allow replacing all
> usage of guest_cpuid_has() with guest_cpu_cap_has().
>
> Take care not to update guest capabilities when KVM is updating CPUID
> entries that *may* become the vCPU's CPUID, e.g. if userspace tries to set
> bogus CPUID information. No extra call to update cpu_caps is needed as
> the cpu_caps are initialized from the incoming guest CPUID, i.e. will
> automatically get the updated values.
>
> Note, none of the features in question use guest_cpu_cap_has() at this
> time, i.e. aside from settings bits in cpu_caps, this is a glorified nop.
>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
> ---
> arch/x86/kvm/cpuid.c | 48 +++++++++++++++++++++++++++++++-------------
> 1 file changed, 34 insertions(+), 14 deletions(-)
>
> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> index 36bd04030989..37a991439fe6 100644
> --- a/arch/x86/kvm/cpuid.c
> +++ b/arch/x86/kvm/cpuid.c
> @@ -262,31 +262,48 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
> return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
> }
>
> +static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
> + struct kvm_cpuid_entry2 *entry,
> + unsigned int x86_feature,
> + bool has_feature)
> +{
> + if (entry)
> + cpuid_entry_change(entry, x86_feature, has_feature);
> +
> + if (vcpu)
> + guest_cpu_cap_change(vcpu, x86_feature, has_feature);
> +}
> +
> static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> int nent)
> {
> struct kvm_cpuid_entry2 *best;
> + struct kvm_vcpu *caps = vcpu;
u32 *caps = vcpu->arch.cpu_caps;
and update guest_cpu_cap_set(), guest_cpu_cap_clear(), guest_cpu_cap_change()
and guest_cpu_cap_restrict() to pass in vcpu->arch.cpu_caps instead of vcpu,
since all of them merely refer to vcpu cap, rather than whole vcpu info.
Or, for simple change, here rename variable name "caps" --> "vcpu", to less
reading confusion.
> +
> + /*
> + * Don't update vCPU capabilities if KVM is updating CPUID entries that
> + * are coming in from userspace!
> + */
> + if (entries != vcpu->arch.cpuid_entries)
> + caps = NULL;
>
> best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> - if (best) {
> - /* Update OSXSAVE bit */
> - if (boot_cpu_has(X86_FEATURE_XSAVE))
> - cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
> +
> + if (boot_cpu_has(X86_FEATURE_XSAVE))
> + kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
> kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
>
> - cpuid_entry_change(best, X86_FEATURE_APIC,
> - vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> + kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
> + vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
>
> - if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> - cpuid_entry_change(best, X86_FEATURE_MWAIT,
> - vcpu->arch.ia32_misc_enable_msr &
> - MSR_IA32_MISC_ENABLE_MWAIT);
> - }
> + if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT))
> + kvm_update_feature_runtime(caps, best, X86_FEATURE_MWAIT,
> + vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT);
>
> best = cpuid_entry2_find(entries, nent, 7, 0);
> - if (best && boot_cpu_has(X86_FEATURE_PKU))
> - cpuid_entry_change(best, X86_FEATURE_OSPKE,
> - kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
> + if (boot_cpu_has(X86_FEATURE_PKU))
> + kvm_update_feature_runtime(caps, best, X86_FEATURE_OSPKE,
> + kvm_is_cr4_bit_set(vcpu, X86_CR4_PKE));
>
> best = cpuid_entry2_find(entries, nent, 0xD, 0);
> if (best)
> @@ -353,6 +370,9 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
> * Reset guest capabilities to userspace's guest CPUID definition, i.e.
> * honor userspace's definition for features that don't require KVM or
> * hardware management/support (or that KVM simply doesn't care about).
> + *
> + * Note, KVM has already done runtime updates on guest CPUID, i.e. this
> + * will also correctly set runtime features in guest CPU capabilities.
> */
> for (i = 0; i < NR_KVM_CPU_CAPS; i++) {
> const struct cpuid_reg cpuid = reverse_cpuid[i];
On Mon, Nov 13, 2023, Robert Hoo wrote:
> On 11/11/2023 7:55 AM, Sean Christopherson wrote:
> > When updating guest CPUID entries to emulate runtime behavior, e.g. when
> > the guest enables a CR4-based feature that is tied to a CPUID flag, also
> > update the vCPU's cpu_caps accordingly. This will allow replacing all
> > usage of guest_cpuid_has() with guest_cpu_cap_has().
> >
> > Take care not to update guest capabilities when KVM is updating CPUID
> > entries that *may* become the vCPU's CPUID, e.g. if userspace tries to set
> > bogus CPUID information. No extra call to update cpu_caps is needed as
> > the cpu_caps are initialized from the incoming guest CPUID, i.e. will
> > automatically get the updated values.
> >
> > Note, none of the features in question use guest_cpu_cap_has() at this
> > time, i.e. aside from settings bits in cpu_caps, this is a glorified nop.
> >
> > Signed-off-by: Sean Christopherson <seanjc@google.com>
> > ---
> > arch/x86/kvm/cpuid.c | 48 +++++++++++++++++++++++++++++++-------------
> > 1 file changed, 34 insertions(+), 14 deletions(-)
> >
> > diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
> > index 36bd04030989..37a991439fe6 100644
> > --- a/arch/x86/kvm/cpuid.c
> > +++ b/arch/x86/kvm/cpuid.c
> > @@ -262,31 +262,48 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2 *entries, int nent)
> > return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
> > }
> > +static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
> > + struct kvm_cpuid_entry2 *entry,
> > + unsigned int x86_feature,
> > + bool has_feature)
> > +{
> > + if (entry)
> > + cpuid_entry_change(entry, x86_feature, has_feature);
> > +
> > + if (vcpu)
> > + guest_cpu_cap_change(vcpu, x86_feature, has_feature);
> > +}
> > +
> > static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *entries,
> > int nent)
> > {
> > struct kvm_cpuid_entry2 *best;
> > + struct kvm_vcpu *caps = vcpu;
>
> u32 *caps = vcpu->arch.cpu_caps;
> and update guest_cpu_cap_set(), guest_cpu_cap_clear(),
> guest_cpu_cap_change() and guest_cpu_cap_restrict() to pass in
> vcpu->arch.cpu_caps instead of vcpu, since all of them merely refer to vcpu
> cap, rather than whole vcpu info.
No, because then every caller would need extra code to pass vcpu->cpu_caps, and
passing 'u32 *' provides less type safety than 'struct kvm_vcpu *'. That tradeoff
isn't worth making this one path slightly easier to read.
> Or, for simple change, here rename variable name "caps" --> "vcpu", to less
> reading confusion.
@vcpu is already defined and needs to be used in this function. See the comment
below.
I'm definitely open to a better name, though I would like to keep the name
relative short so that the line lengths of the callers is reasonable, e.g. would
prefer not to do vcpu_caps.
> > + /*
> > + * Don't update vCPU capabilities if KVM is updating CPUID entries that
> > + * are coming in from userspace!
> > + */
> > + if (entries != vcpu->arch.cpuid_entries)
> > + caps = NULL;
> > best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
> > - if (best) {
> > - /* Update OSXSAVE bit */
> > - if (boot_cpu_has(X86_FEATURE_XSAVE))
> > - cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
> > +
> > + if (boot_cpu_has(X86_FEATURE_XSAVE))
> > + kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
> > kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
> > - cpuid_entry_change(best, X86_FEATURE_APIC,
> > - vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
> > + kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
> > + vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
On 11/14/2023 9:48 PM, Sean Christopherson wrote:
> On Mon, Nov 13, 2023, Robert Hoo wrote:
...
>> u32 *caps = vcpu->arch.cpu_caps;
>> and update guest_cpu_cap_set(), guest_cpu_cap_clear(),
>> guest_cpu_cap_change() and guest_cpu_cap_restrict() to pass in
>> vcpu->arch.cpu_caps instead of vcpu, since all of them merely refer to vcpu
>> cap, rather than whole vcpu info.
>
> No, because then every caller would need extra code to pass vcpu->cpu_caps,
Emm, I don't understand this. I tried to modified and compiled, all need to do
is simply substitute "vcpu" with "vcpu->arch.cpu_caps" in calling. (at the end
is my diff based on this patch set)
> and
> passing 'u32 *' provides less type safety than 'struct kvm_vcpu *'. That tradeoff
> isn't worth making this one path slightly easier to read.
My point is also from vulnerability, long term, since as a principle, we'd
better pass in param/info to a function of its necessity. e.g. cpuid_entry2_find().
Anyway, this is a less important point, shouldn't distract your focus.
This patch set's whole idea is good, I also felt confusion when initially
looking into vCPUID code and its complicated dependencies with each other and
KVM cap (or your word govern) ( and even Kernel govern and HW cap ?). With this
guest_cap[], the layered relationship can be much clearer, alone with fast guest
cap queries.
>
>> Or, for simple change, here rename variable name "caps" --> "vcpu", to less
>> reading confusion.
>
> @vcpu is already defined and needs to be used in this function. See the comment
> below.
>
> I'm definitely open to a better name, though I would like to keep the name
> relative short so that the line lengths of the callers is reasonable, e.g. would
> prefer not to do vcpu_caps.
>
>>> + /*
>>> + * Don't update vCPU capabilities if KVM is updating CPUID entries that
>>> + * are coming in from userspace!
>>> + */
>>> + if (entries != vcpu->arch.cpuid_entries)
>>> + caps = NULL;
>>> best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT);
>>> - if (best) {
>>> - /* Update OSXSAVE bit */
>>> - if (boot_cpu_has(X86_FEATURE_XSAVE))
>>> - cpuid_entry_change(best, X86_FEATURE_OSXSAVE,
>>> +
>>> + if (boot_cpu_has(X86_FEATURE_XSAVE))
>>> + kvm_update_feature_runtime(caps, best, X86_FEATURE_OSXSAVE,
>>> kvm_is_cr4_bit_set(vcpu, X86_CR4_OSXSAVE));
>>> - cpuid_entry_change(best, X86_FEATURE_APIC,
>>> - vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
>>> + kvm_update_feature_runtime(caps, best, X86_FEATURE_APIC,
>>> + vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 6407e5c45f20..3e8976705342 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -262,7 +262,7 @@ static u64 cpuid_get_supported_xcr0(struct kvm_cpuid_entry2
*entries, int nent)
return (best->eax | ((u64)best->edx << 32)) & kvm_caps.supported_xcr0;
}
-static __always_inline void kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
+static __always_inline void kvm_update_feature_runtime(u32 *guest_caps,
struct kvm_cpuid_entry2
*entry,
unsigned int x86_feature,
bool has_feature)
@@ -270,15 +270,15 @@ static __always_inline void
kvm_update_feature_runtime(struct kvm_vcpu *vcpu,
if (entry)
cpuid_entry_change(entry, x86_feature, has_feature);
- if (vcpu)
- guest_cpu_cap_change(vcpu, x86_feature, has_feature);
+ if (guest_caps)
+ guest_cpu_cap_change(guest_caps, x86_feature, has_feature);
}
static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct
kvm_cpuid_entry2 *entries,
int nent)
{
struct kvm_cpuid_entry2 *best;
- struct kvm_vcpu *caps = vcpu;
+ u32 *caps = vcpu->arch.cpu_caps;
/*
* Don't update vCPU capabilities if KVM is updating CPUID entries that
@@ -397,7 +397,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
*/
allow_gbpages = tdp_enabled ? boot_cpu_has(X86_FEATURE_GBPAGES) :
guest_cpu_cap_has(vcpu, X86_FEATURE_GBPAGES);
- guest_cpu_cap_change(vcpu, X86_FEATURE_GBPAGES, allow_gbpages);
+ guest_cpu_cap_change(vcpu->arch.cpu_caps, X86_FEATURE_GBPAGES,
allow_gbpages);
best = kvm_find_cpuid_entry(vcpu, 1);
if (best && apic) {
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 98694dfe062e..a3a0482fc514 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -183,39 +183,39 @@ static __always_inline bool guest_pv_has(struct kvm_vcpu
*vcpu,
return vcpu->arch.pv_cpuid.features & (1u << kvm_feature);
}
-static __always_inline void guest_cpu_cap_set(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_set(u32 *caps,
unsigned int x86_feature)
{
unsigned int x86_leaf = __feature_leaf(x86_feature);
reverse_cpuid_check(x86_leaf);
- vcpu->arch.cpu_caps[x86_leaf] |= __feature_bit(x86_feature);
+ caps[x86_leaf] |= __feature_bit(x86_feature);
}
-static __always_inline void guest_cpu_cap_clear(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_clear(u32 *caps,
unsigned int x86_feature)
{
unsigned int x86_leaf = __feature_leaf(x86_feature);
reverse_cpuid_check(x86_leaf);
- vcpu->arch.cpu_caps[x86_leaf] &= ~__feature_bit(x86_feature);
+ caps[x86_leaf] &= ~__feature_bit(x86_feature);
}
-static __always_inline void guest_cpu_cap_change(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_change(u32 *caps,
unsigned int x86_feature,
bool guest_has_cap)
{
if (guest_has_cap)
- guest_cpu_cap_set(vcpu, x86_feature);
+ guest_cpu_cap_set(caps, x86_feature);
else
- guest_cpu_cap_clear(vcpu, x86_feature);
+ guest_cpu_cap_clear(caps, x86_feature);
}
-static __always_inline void guest_cpu_cap_restrict(struct kvm_vcpu *vcpu,
+static __always_inline void guest_cpu_cap_restrict(u32 *caps,
unsigned int x86_feature)
{
if (!kvm_cpu_cap_has(x86_feature))
- guest_cpu_cap_clear(vcpu, x86_feature);
+ guest_cpu_cap_clear(caps, x86_feature);
}
static __always_inline bool guest_cpu_cap_has(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 6fe2d7bf4959..dd4ca07c3cd0 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4315,14 +4315,14 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* XSS on VM-Enter/VM-Exit. Failure to do so would effectively give
* the guest read/write access to the host's XSS.
*/
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_XSAVE);
- guest_cpu_cap_change(vcpu, X86_FEATURE_XSAVES,
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_XSAVE);
+ guest_cpu_cap_change(vcpu->arch.cpu_caps, X86_FEATURE_XSAVES,
boot_cpu_has(X86_FEATURE_XSAVES) &&
guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE));
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_NRIPS);
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_TSCRATEMSR);
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_LBRV);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_NRIPS);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_TSCRATEMSR);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_LBRV);
/*
* Intercept VMLOAD if the vCPU mode is Intel in order to emulate that
@@ -4330,12 +4330,12 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* SVM on Intel is bonkers and extremely unlikely to work).
*/
if (!guest_cpuid_is_intel(vcpu))
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_V_VMSAVE_VMLOAD);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps,
X86_FEATURE_V_VMSAVE_VMLOAD);
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_PAUSEFILTER);
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_PFTHRESHOLD);
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_VGIF);
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_VNMI);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PAUSEFILTER);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PFTHRESHOLD);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VGIF);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VNMI);
svm_recalc_instruction_intercepts(vcpu, svm);
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7645945af5c5..c23c96dc24cf 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -7752,13 +7752,13 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
* to the guest. XSAVES depends on CR4.OSXSAVE, and CR4.OSXSAVE can be
* set if and only if XSAVE is supported.
*/
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_XSAVE);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_XSAVE);
if (guest_cpu_cap_has(vcpu, X86_FEATURE_XSAVE))
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_XSAVES);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_XSAVES);
else
- guest_cpu_cap_clear(vcpu, X86_FEATURE_XSAVES);
+ guest_cpu_cap_clear(vcpu->arch.cpu_caps, X86_FEATURE_XSAVES);
- guest_cpu_cap_restrict(vcpu, X86_FEATURE_VMX);
+ guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VMX);
vmx_setup_uret_msrs(vmx);
On Wed, Nov 15, 2023, Robert Hoo wrote: > On 11/14/2023 9:48 PM, Sean Christopherson wrote: > > On Mon, Nov 13, 2023, Robert Hoo wrote: > ... > > > u32 *caps = vcpu->arch.cpu_caps; > > > and update guest_cpu_cap_set(), guest_cpu_cap_clear(), > > > guest_cpu_cap_change() and guest_cpu_cap_restrict() to pass in > > > vcpu->arch.cpu_caps instead of vcpu, since all of them merely refer to vcpu > > > cap, rather than whole vcpu info. > > > > No, because then every caller would need extra code to pass > > vcpu->cpu_caps, > > Emm, I don't understand this. I tried to modified and compiled, all need to > do is simply substitute "vcpu" with "vcpu->arch.cpu_caps" in calling. (at > the end is my diff based on this patch set) Yes, and I'm saying that guest_cpu_cap_restrict(vcpu, X86_FEATURE_PAUSEFILTER); guest_cpu_cap_restrict(vcpu, X86_FEATURE_PFTHRESHOLD); guest_cpu_cap_restrict(vcpu, X86_FEATURE_VGIF); guest_cpu_cap_restrict(vcpu, X86_FEATURE_VNMI); is harder to read and write than this guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PAUSEFILTER); guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PFTHRESHOLD); guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VGIF); guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VNMI); a one-time search-replace is easy, but the extra boilerplate has a non-zero cost for every future developer/reader. > > and passing 'u32 *' provides less type safety than 'struct kvm_vcpu *'. > > That tradeoff isn't worth making this one path slightly easier to read. > > My point is also from vulnerability, long term, since as a principle, we'd > better pass in param/info to a function of its necessity. Attempting to apply the principle of least privilege to low level C helpers is nonsensical. E.g. the helper can trivially get at the owning vcpu via container_of() (well, if not for typeof assertions not playing nice with arrays, but open coding container_of() is also trivial and illustrates the point). struct kvm_vcpu_arch *arch = (void *)caps - offsetof(struct kvm_vcpu_arch, cpu_caps); struct kvm_vcpu *vcpu = container_of(arch, struct kvm_vcpu, arch); if (!kvm_cpu_cap_has(x86_feature)) guest_cpu_cap_clear(vcpu, x86_feature); And the intent behind that principle is to improve security/robustness; what I'm saying is that passing in a 'u32 *" makes the overall implementation _less_ robust, as it opens up the possibilities of passing in an unsafe/incorrect pointer. E.g. a well-intentioned, not _that_ obviously broken example is: guest_cpu_cap_restrict(&vcpu->arch.cpu_caps[CPUID_1_ECX], X86_FEATURE_XSAVE); > e.g. cpuid_entry2_find(). The main reason cpuid_entry2_find() exists is because KVM checks the incoming array provided by KVM_SET_CPUID2, which is also the reason why __kvm_update_cpuid_runtime() takes an @entries array instead of just @vcpu.
On 11/15/2023 11:09 PM, Sean Christopherson wrote: ... >>> No, because then every caller would need extra code to pass >>> vcpu->cpu_caps, >> >> Emm, I don't understand this. I tried to modified and compiled, all need to >> do is simply substitute "vcpu" with "vcpu->arch.cpu_caps" in calling. (at >> the end is my diff based on this patch set) > > Yes, and I'm saying that > > guest_cpu_cap_restrict(vcpu, X86_FEATURE_PAUSEFILTER); > guest_cpu_cap_restrict(vcpu, X86_FEATURE_PFTHRESHOLD); > guest_cpu_cap_restrict(vcpu, X86_FEATURE_VGIF); > guest_cpu_cap_restrict(vcpu, X86_FEATURE_VNMI); > > is harder to read and write than this > > guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PAUSEFILTER); > guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_PFTHRESHOLD); > guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VGIF); > guest_cpu_cap_restrict(vcpu->arch.cpu_caps, X86_FEATURE_VNMI); > > a one-time search-replace is easy, but the extra boilerplate has a non-zero cost > for every future developer/reader. Hmm, I think this is trivial. And can be solved/eased by other means, e.g. Macro?. Rather than in the sacrifice of letting function's inside (easily) access those info it shouldn't. > >>> and passing 'u32 *' provides less type safety than 'struct kvm_vcpu *'. >>> That tradeoff isn't worth making this one path slightly easier to read. >> >> My point is also from vulnerability, long term, since as a principle, we'd >> better pass in param/info to a function of its necessity. > > Attempting to apply the principle of least privilege to low level C helpers is > nonsensical. E.g. the helper can trivially get at the owning vcpu via container_of() > (well, if not for typeof assertions not playing nice with arrays, but open coding > container_of() is also trivial and illustrates the point). > > struct kvm_vcpu_arch *arch = (void *)caps - offsetof(struct kvm_vcpu_arch, cpu_caps); > struct kvm_vcpu *vcpu = container_of(arch, struct kvm_vcpu, arch); > > if (!kvm_cpu_cap_has(x86_feature)) > guest_cpu_cap_clear(vcpu, x86_feature); > > And the intent behind that principle is to improve security/robustness; what I'm > saying is that passing in a 'u32 *" makes the overall implementation _less_ robust, > as it opens up the possibilities of passing in an unsafe/incorrect pointer. E.g. > a well-intentioned, not _that_ obviously broken example is: > > guest_cpu_cap_restrict(&vcpu->arch.cpu_caps[CPUID_1_ECX], X86_FEATURE_XSAVE); > >> e.g. cpuid_entry2_find(). > > The main reason cpuid_entry2_find() exists is because KVM checks the incoming > array provided by KVM_SET_CPUID2, which is also the reason why > __kvm_update_cpuid_runtime() takes an @entries array instead of just @vcpu. Thanks for detailed explanation, I understand your points deeper, though I would still prefer to honoring the principle if it was me to write the function. The concerns above can/should be addressed by other means. (If some really cannot be solved in C, i.e. more stringent type check, it's C to blame ;) but it on the other side offers those flexibility that other languages cannot, doesn't it?) Another pros of the principle is that, it's also a fence, prevent (at least raise the bar) people in the future from doing something that shouldn't be in the function, e.g. for his convenience to quickly fix a bug etc. Anyway, it's a dilemma, and I said it's a less important point for this great progress of vCPUID's implementation, thanks. Reviewed-by: Robert Hoo <robert.hoo.linux@gmail.com>
© 2016 - 2025 Red Hat, Inc.