[PATCH v15 09/41] KVM: x86: Load guest FPU state when access XSAVE-managed MSRs

Sean Christopherson posted 41 patches 2 weeks, 5 days ago
[PATCH v15 09/41] KVM: x86: Load guest FPU state when access XSAVE-managed MSRs
Posted by Sean Christopherson 2 weeks, 5 days ago
Load the guest's FPU state if userspace is accessing MSRs whose values
are managed by XSAVES. Introduce two helpers, kvm_{get,set}_xstate_msr(),
to facilitate access to such kind of MSRs.

If MSRs supported in kvm_caps.supported_xss are passed through to guest,
the guest MSRs are swapped with host's before vCPU exits to userspace and
after it reenters kernel before next VM-entry.

Because the modified code is also used for the KVM_GET_MSRS device ioctl(),
explicitly check @vcpu is non-null before attempting to load guest state.
The XSAVE-managed MSRs cannot be retrieved via the device ioctl() without
loading guest FPU state (which doesn't exist).

Note that guest_cpuid_has() is not queried as host userspace is allowed to
access MSRs that have not been exposed to the guest, e.g. it might do
KVM_SET_MSRS prior to KVM_SET_CPUID2.

The two helpers are put here in order to manifest accessing xsave-managed
MSRs requires special check and handling to guarantee the correctness of
read/write to the MSRs.

Co-developed-by: Yang Weijiang <weijiang.yang@intel.com>
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
Tested-by: Mathias Krause <minipli@grsecurity.net>
Tested-by: John Allen <john.allen@amd.com>
Tested-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Chao Gao <chao.gao@intel.com>
[sean: drop S_CET, add big comment, move accessors to x86.c]
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
 arch/x86/kvm/x86.c | 86 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 85 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c5e38d6943fe..a95ca2fbd3a9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -136,6 +136,9 @@ static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
 static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
 
 static DEFINE_MUTEX(vendor_module_lock);
+static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
+static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
+
 struct kvm_x86_ops kvm_x86_ops __read_mostly;
 
 #define KVM_X86_OP(func)					     \
@@ -3801,6 +3804,66 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
 	mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
 }
 
+/*
+ * Returns true if the MSR in question is managed via XSTATE, i.e. is context
+ * switched with the rest of guest FPU state.  Note!  S_CET is _not_ context
+ * switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS.
+ * Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields,
+ * the value saved/restored via XSTATE is always the host's value.  That detail
+ * is _extremely_ important, as the guest's S_CET must _never_ be resident in
+ * hardware while executing in the host.  Loading guest values for U_CET and
+ * PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to
+ * userspace, and PL[0-3]_SSP are only consumed when transitioning to lower
+ * privilegel levels, i.e. are effectively only consumed by userspace as well.
+ */
+static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr)
+{
+	if (!vcpu)
+		return false;
+
+	switch (msr) {
+	case MSR_IA32_U_CET:
+		return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) ||
+		       guest_cpu_cap_has(vcpu, X86_FEATURE_IBT);
+	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
+		return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK);
+	default:
+		return false;
+	}
+}
+
+/*
+ * Lock and/or reload guest FPU and access xstate MSRs. For accesses initiated
+ * by host, guest FPU is loaded in __msr_io(). For accesses initiated by guest,
+ * guest FPU should have been loaded already.
+ */
+static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu,
+						  struct msr_data *msr_info,
+						  int access)
+{
+	BUILD_BUG_ON(access != MSR_TYPE_R && access != MSR_TYPE_W);
+
+	KVM_BUG_ON(!is_xstate_managed_msr(vcpu, msr_info->index), vcpu->kvm);
+	KVM_BUG_ON(!vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm);
+
+	kvm_fpu_get();
+	if (access == MSR_TYPE_R)
+		rdmsrq(msr_info->index, msr_info->data);
+	else
+		wrmsrq(msr_info->index, msr_info->data);
+	kvm_fpu_put();
+}
+
+static __maybe_unused void kvm_set_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+	kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_W);
+}
+
+static __maybe_unused void kvm_get_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+{
+	kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_R);
+}
+
 int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	u32 msr = msr_info->index;
@@ -4551,11 +4614,25 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
 		    int (*do_msr)(struct kvm_vcpu *vcpu,
 				  unsigned index, u64 *data))
 {
+	bool fpu_loaded = false;
 	int i;
 
-	for (i = 0; i < msrs->nmsrs; ++i)
+	for (i = 0; i < msrs->nmsrs; ++i) {
+		/*
+		 * If userspace is accessing one or more XSTATE-managed MSRs,
+		 * temporarily load the guest's FPU state so that the guest's
+		 * MSR value(s) is resident in hardware, i.e. so that KVM can
+		 * get/set the MSR via RDMSR/WRMSR.
+		 */
+		if (!fpu_loaded && is_xstate_managed_msr(vcpu, entries[i].index)) {
+			kvm_load_guest_fpu(vcpu);
+			fpu_loaded = true;
+		}
 		if (do_msr(vcpu, entries[i].index, &entries[i].data))
 			break;
+	}
+	if (fpu_loaded)
+		kvm_put_guest_fpu(vcpu);
 
 	return i;
 }
@@ -5965,6 +6042,7 @@ static int kvm_get_set_one_reg(struct kvm_vcpu *vcpu, unsigned int ioctl,
 	struct kvm_one_reg one_reg;
 	struct kvm_x86_reg_id *reg;
 	u64 __user *user_val;
+	bool load_fpu;
 	int r;
 
 	if (copy_from_user(&one_reg, argp, sizeof(one_reg)))
@@ -5991,12 +6069,18 @@ static int kvm_get_set_one_reg(struct kvm_vcpu *vcpu, unsigned int ioctl,
 
 	guard(srcu)(&vcpu->kvm->srcu);
 
+	load_fpu = is_xstate_managed_msr(vcpu, reg->index);
+	if (load_fpu)
+		kvm_load_guest_fpu(vcpu);
+
 	user_val = u64_to_user_ptr(one_reg.addr);
 	if (ioctl == KVM_GET_ONE_REG)
 		r = kvm_get_one_msr(vcpu, reg->index, user_val);
 	else
 		r = kvm_set_one_msr(vcpu, reg->index, user_val);
 
+	if (load_fpu)
+		kvm_put_guest_fpu(vcpu);
 	return r;
 }
 
-- 
2.51.0.384.g4c02a37b29-goog
Re: [PATCH v15 09/41] KVM: x86: Load guest FPU state when access XSAVE-managed MSRs
Posted by Binbin Wu 2 weeks, 2 days ago

On 9/13/2025 7:22 AM, Sean Christopherson wrote:
> Load the guest's FPU state if userspace is accessing MSRs whose values
> are managed by XSAVES. Introduce two helpers, kvm_{get,set}_xstate_msr(),
> to facilitate access to such kind of MSRs.
>
> If MSRs supported in kvm_caps.supported_xss are passed through to guest,
> the guest MSRs are swapped with host's before vCPU exits to userspace and
> after it reenters kernel before next VM-entry.
>
> Because the modified code is also used for the KVM_GET_MSRS device ioctl(),
> explicitly check @vcpu is non-null before attempting to load guest state.
> The XSAVE-managed MSRs cannot be retrieved via the device ioctl() without
> loading guest FPU state (which doesn't exist).
>
> Note that guest_cpuid_has() is not queried as host userspace is allowed to
> access MSRs that have not been exposed to the guest, e.g. it might do
> KVM_SET_MSRS prior to KVM_SET_CPUID2.
>
> The two helpers are put here in order to manifest accessing xsave-managed
> MSRs requires special check and handling to guarantee the correctness of
> read/write to the MSRs.
>
> Co-developed-by: Yang Weijiang <weijiang.yang@intel.com>
> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> Tested-by: Mathias Krause <minipli@grsecurity.net>
> Tested-by: John Allen <john.allen@amd.com>
> Tested-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> [sean: drop S_CET, add big comment, move accessors to x86.c]
> Signed-off-by: Sean Christopherson <seanjc@google.com>

Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>

Two nits below.

> ---
>   arch/x86/kvm/x86.c | 86 +++++++++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 85 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c5e38d6943fe..a95ca2fbd3a9 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -136,6 +136,9 @@ static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
>   static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
>   
>   static DEFINE_MUTEX(vendor_module_lock);
> +static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
> +static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
> +
>   struct kvm_x86_ops kvm_x86_ops __read_mostly;
>   
>   #define KVM_X86_OP(func)					     \
> @@ -3801,6 +3804,66 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
>   	mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
>   }
>   
> +/*
> + * Returns true if the MSR in question is managed via XSTATE, i.e. is context
> + * switched with the rest of guest FPU state.  Note!  S_CET is _not_ context
> + * switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS.
> + * Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields,
> + * the value saved/restored via XSTATE is always the host's value.  That detail
> + * is _extremely_ important, as the guest's S_CET must _never_ be resident in
> + * hardware while executing in the host.  Loading guest values for U_CET and
> + * PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to
> + * userspace, and PL[0-3]_SSP are only consumed when transitioning to lower
> + * privilegel levels, i.e. are effectively only consumed by userspace as well.
> + */
> +static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr)
> +{
> +	if (!vcpu)
> +		return false;
> +
> +	switch (msr) {
> +	case MSR_IA32_U_CET:
> +		return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) ||
> +		       guest_cpu_cap_has(vcpu, X86_FEATURE_IBT);
> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> +		return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK);
> +	default:
> +		return false;
> +	}
> +}
> +
> +/*
> + * Lock and/or reload guest FPU and access xstate MSRs. For accesses initiated


Lock is unconditional and reload is conditional.
"and/or" seems not accurate?

> + * by host, guest FPU is loaded in __msr_io(). For accesses initiated by guest,
> + * guest FPU should have been loaded already.
> + */
> +static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu,
> +						  struct msr_data *msr_info,
> +						  int access)
> +{
> +	BUILD_BUG_ON(access != MSR_TYPE_R && access != MSR_TYPE_W);
> +
> +	KVM_BUG_ON(!is_xstate_managed_msr(vcpu, msr_info->index), vcpu->kvm);
> +	KVM_BUG_ON(!vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm);
> +
> +	kvm_fpu_get();
> +	if (access == MSR_TYPE_R)
> +		rdmsrq(msr_info->index, msr_info->data);
> +	else
> +		wrmsrq(msr_info->index, msr_info->data);
> +	kvm_fpu_put();
> +}
> +
> +static __maybe_unused void kvm_set_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> +{
> +	kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_W);
> +}
> +
> +static __maybe_unused void kvm_get_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> +{
> +	kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_R);
> +}
> +
>   int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>   {
>   	u32 msr = msr_info->index;
> @@ -4551,11 +4614,25 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
>   		    int (*do_msr)(struct kvm_vcpu *vcpu,
>   				  unsigned index, u64 *data))
>   {
> +	bool fpu_loaded = false;
>   	int i;
>   
> -	for (i = 0; i < msrs->nmsrs; ++i)
> +	for (i = 0; i < msrs->nmsrs; ++i) {
> +		/*
> +		 * If userspace is accessing one or more XSTATE-managed MSRs,
> +		 * temporarily load the guest's FPU state so that the guest's
> +		 * MSR value(s) is resident in hardware, i.e. so that KVM can

Using "i.e." and "so that" together feels repetitive.[...]
Re: [PATCH v15 09/41] KVM: x86: Load guest FPU state when access XSAVE-managed MSRs
Posted by Sean Christopherson 2 weeks, 1 day ago
On Tue, Sep 16, 2025, Binbin Wu wrote:
> > +/*
> > + * Lock and/or reload guest FPU and access xstate MSRs. For accesses initiated
> 
> 
> Lock is unconditional and reload is conditional.
> "and/or" seems not accurate?

Agreed.  This?

/*
 * Lock andr (re)load guest FPU and access xstate MSRs. For accesses initiated
 * by host, guest FPU is loaded in __msr_io(). For accesses initiated by guest,
 * guest FPU should have been loaded already.
 */

> 
> > + * by host, guest FPU is loaded in __msr_io(). For accesses initiated by guest,
> > + * guest FPU should have been loaded already.
> > + */
> > +static __always_inline void kvm_access_xstate_msr(struct kvm_vcpu *vcpu,
> > +						  struct msr_data *msr_info,
> > +						  int access)
> > +{
> > +	BUILD_BUG_ON(access != MSR_TYPE_R && access != MSR_TYPE_W);
> > +
> > +	KVM_BUG_ON(!is_xstate_managed_msr(vcpu, msr_info->index), vcpu->kvm);
> > +	KVM_BUG_ON(!vcpu->arch.guest_fpu.fpstate->in_use, vcpu->kvm);
> > +
> > +	kvm_fpu_get();
> > +	if (access == MSR_TYPE_R)
> > +		rdmsrq(msr_info->index, msr_info->data);
> > +	else
> > +		wrmsrq(msr_info->index, msr_info->data);
> > +	kvm_fpu_put();
> > +}
> > +
> > +static __maybe_unused void kvm_set_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > +{
> > +	kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_W);
> > +}
> > +
> > +static __maybe_unused void kvm_get_xstate_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > +{
> > +	kvm_access_xstate_msr(vcpu, msr_info, MSR_TYPE_R);
> > +}
> > +
> >   int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> >   {
> >   	u32 msr = msr_info->index;
> > @@ -4551,11 +4614,25 @@ static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
> >   		    int (*do_msr)(struct kvm_vcpu *vcpu,
> >   				  unsigned index, u64 *data))
> >   {
> > +	bool fpu_loaded = false;
> >   	int i;
> > -	for (i = 0; i < msrs->nmsrs; ++i)
> > +	for (i = 0; i < msrs->nmsrs; ++i) {
> > +		/*
> > +		 * If userspace is accessing one or more XSTATE-managed MSRs,
> > +		 * temporarily load the guest's FPU state so that the guest's
> > +		 * MSR value(s) is resident in hardware, i.e. so that KVM can
> 
> Using "i.e." and "so that" together feels repetitive.[...]

		/*
		 * If userspace is accessing one or more XSTATE-managed MSRs,
		 * temporarily load the guest's FPU state so that the guest's
		 * MSR value(s) is resident in hardware and thus can be accessed
		 * via RDMSR/WRMSR.
		 */
Re: [PATCH v15 09/41] KVM: x86: Load guest FPU state when access XSAVE-managed MSRs
Posted by Sean Christopherson 2 weeks ago
On Wed, Sep 17, 2025, Sean Christopherson wrote:
> On Tue, Sep 16, 2025, Binbin Wu wrote:
> > > +/*
> > > + * Lock and/or reload guest FPU and access xstate MSRs. For accesses initiated
> > 
> > 
> > Lock is unconditional and reload is conditional.
> > "and/or" seems not accurate?
> 
> Agreed.  This?
> 
> /*
>  * Lock andr (re)load guest FPU and access xstate MSRs. For accesses initiated
>  * by host, guest FPU is loaded in __msr_io(). For accesses initiated by guest,
>  * guest FPU should have been loaded already.
>  */

That's not very good either.

/*
 * Lock (and if necessary, re-load) the guest FPU, i.e. XSTATE, and access an
 * MSR that is managed via XSTATE.  Note, the caller is responsible for doing
 * the initial FPU load, this helper only ensures that guest state is resident
 * in hardware (the kernel can load its FPU state in IRQ context).
 */
Re: [PATCH v15 09/41] KVM: x86: Load guest FPU state when access XSAVE-managed MSRs
Posted by Binbin Wu 2 weeks, 1 day ago

On 9/16/2025 4:28 PM, Binbin Wu wrote:
>
>
> On 9/13/2025 7:22 AM, Sean Christopherson wrote:
>> Load the guest's FPU state if userspace is accessing MSRs whose values
>> are managed by XSAVES. Introduce two helpers, kvm_{get,set}_xstate_msr(),
>> to facilitate access to such kind of MSRs.
>>
>> If MSRs supported in kvm_caps.supported_xss are passed through to guest,
>> the guest MSRs are swapped with host's before vCPU exits to userspace and
>> after it reenters kernel before next VM-entry.
>>
>> Because the modified code is also used for the KVM_GET_MSRS device ioctl(),
>> explicitly check @vcpu is non-null before attempting to load guest state.
>> The XSAVE-managed MSRs cannot be retrieved via the device ioctl() without
>> loading guest FPU state (which doesn't exist).
>>
>> Note that guest_cpuid_has() is not queried as host userspace is allowed to
>> access MSRs that have not been exposed to the guest, e.g. it might do
>> KVM_SET_MSRS prior to KVM_SET_CPUID2.
>>
>> The two helpers are put here in order to manifest accessing xsave-managed
>> MSRs requires special check and handling to guarantee the correctness of
>> read/write to the MSRs.
>>
>> Co-developed-by: Yang Weijiang <weijiang.yang@intel.com>
>> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
>> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
>> Tested-by: Mathias Krause <minipli@grsecurity.net>
>> Tested-by: John Allen <john.allen@amd.com>
>> Tested-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>> Signed-off-by: Chao Gao <chao.gao@intel.com>
>> [sean: drop S_CET, add big comment, move accessors to x86.c]
>> Signed-off-by: Sean Christopherson <seanjc@google.com>
>
> Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>
>
> Two nits below.
>
>> ---
>>   arch/x86/kvm/x86.c | 86 +++++++++++++++++++++++++++++++++++++++++++++-
>>   1 file changed, 85 insertions(+), 1 deletion(-)
>>
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index c5e38d6943fe..a95ca2fbd3a9 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -136,6 +136,9 @@ static int __set_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
>>   static void __get_sregs2(struct kvm_vcpu *vcpu, struct kvm_sregs2 *sregs2);
>>     static DEFINE_MUTEX(vendor_module_lock);
>> +static void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
>> +static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
>> +
>>   struct kvm_x86_ops kvm_x86_ops __read_mostly;
>>     #define KVM_X86_OP(func)                         \
>> @@ -3801,6 +3804,66 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
>>       mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
>>   }
>>   +/*
>> + * Returns true if the MSR in question is managed via XSTATE, i.e. is context
>> + * switched with the rest of guest FPU state.  Note!  S_CET is _not_ context
>> + * switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS.
>> + * Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields,
>> + * the value saved/restored via XSTATE is always the host's value.  That detail
>> + * is _extremely_ important, as the guest's S_CET must _never_ be resident in
>> + * hardware while executing in the host.  Loading guest values for U_CET and
>> + * PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to
>> + * userspace, and PL[0-3]_SSP are only consumed when transitioning to lower
>> + * privilegel levels, i.e. are effectively only consumed by userspace as well.
>> + */

privilegel -> privilege


Re: [PATCH v15 09/41] KVM: x86: Load guest FPU state when access XSAVE-managed MSRs
Posted by Xiaoyao Li 2 weeks, 2 days ago
On 9/13/2025 7:22 AM, Sean Christopherson wrote:
> Load the guest's FPU state if userspace is accessing MSRs whose values
> are managed by XSAVES. Introduce two helpers, kvm_{get,set}_xstate_msr(),
> to facilitate access to such kind of MSRs.
> 
> If MSRs supported in kvm_caps.supported_xss are passed through to guest,
> the guest MSRs are swapped with host's before vCPU exits to userspace and
> after it reenters kernel before next VM-entry.
> 
> Because the modified code is also used for the KVM_GET_MSRS device ioctl(),
> explicitly check @vcpu is non-null before attempting to load guest state.
> The XSAVE-managed MSRs cannot be retrieved via the device ioctl() without
> loading guest FPU state (which doesn't exist).
> 
> Note that guest_cpuid_has() is not queried as host userspace is allowed to
> access MSRs that have not been exposed to the guest, e.g. it might do
> KVM_SET_MSRS prior to KVM_SET_CPUID2.
> 
> The two helpers are put here in order to manifest accessing xsave-managed
> MSRs requires special check and handling to guarantee the correctness of
> read/write to the MSRs.
> 
> Co-developed-by: Yang Weijiang <weijiang.yang@intel.com>
> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> Tested-by: Mathias Krause <minipli@grsecurity.net>
> Tested-by: John Allen <john.allen@amd.com>
> Tested-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> [sean: drop S_CET, add big comment, move accessors to x86.c]
> Signed-off-by: Sean Christopherson <seanjc@google.com>

Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
Re: [PATCH v15 09/41] KVM: x86: Load guest FPU state when access XSAVE-managed MSRs
Posted by Xin Li 2 weeks, 3 days ago
On 9/12/2025 4:22 PM, Sean Christopherson wrote:
> Load the guest's FPU state if userspace is accessing MSRs whose values
> are managed by XSAVES. Introduce two helpers, kvm_{get,set}_xstate_msr(),
> to facilitate access to such kind of MSRs.
> 
> If MSRs supported in kvm_caps.supported_xss are passed through to guest,
> the guest MSRs are swapped with host's before vCPU exits to userspace and
> after it reenters kernel before next VM-entry.
> 
> Because the modified code is also used for the KVM_GET_MSRS device ioctl(),
> explicitly check @vcpu is non-null before attempting to load guest state.
> The XSAVE-managed MSRs cannot be retrieved via the device ioctl() without
> loading guest FPU state (which doesn't exist).
> 
> Note that guest_cpuid_has() is not queried as host userspace is allowed to
> access MSRs that have not been exposed to the guest, e.g. it might do
> KVM_SET_MSRS prior to KVM_SET_CPUID2.
> 
> The two helpers are put here in order to manifest accessing xsave-managed
> MSRs requires special check and handling to guarantee the correctness of
> read/write to the MSRs.
> 
> Co-developed-by: Yang Weijiang <weijiang.yang@intel.com>
> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
> Tested-by: Mathias Krause <minipli@grsecurity.net>
> Tested-by: John Allen <john.allen@amd.com>
> Tested-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Chao Gao <chao.gao@intel.com>
> [sean: drop S_CET, add big comment, move accessors to x86.c]
> Signed-off-by: Sean Christopherson <seanjc@google.com>


Reviewed-by: Xin Li (Intel) <xin@zytor.com>


> ---
>   arch/x86/kvm/x86.c | 86 +++++++++++++++++++++++++++++++++++++++++++++-
>   1 file changed, 85 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index c5e38d6943fe..a95ca2fbd3a9 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -3801,6 +3804,66 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
>   	mark_page_dirty_in_slot(vcpu->kvm, ghc->memslot, gpa_to_gfn(ghc->gpa));
>   }
>   
> +/*
> + * Returns true if the MSR in question is managed via XSTATE, i.e. is context
> + * switched with the rest of guest FPU state.  Note!  S_CET is _not_ context
> + * switched via XSTATE even though it _is_ saved/restored via XSAVES/XRSTORS.
> + * Because S_CET is loaded on VM-Enter and VM-Exit via dedicated VMCS fields,
> + * the value saved/restored via XSTATE is always the host's value.  That detail
> + * is _extremely_ important, as the guest's S_CET must _never_ be resident in
> + * hardware while executing in the host.  Loading guest values for U_CET and
> + * PL[0-3]_SSP while executing in the kernel is safe, as U_CET is specific to
> + * userspace, and PL[0-3]_SSP are only consumed when transitioning to lower
> + * privilegel levels, i.e. are effectively only consumed by userspace as well.
> + */
> +static bool is_xstate_managed_msr(struct kvm_vcpu *vcpu, u32 msr)
> +{
> +	if (!vcpu)
> +		return false;
> +
> +	switch (msr) {
> +	case MSR_IA32_U_CET:
> +		return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) ||
> +		       guest_cpu_cap_has(vcpu, X86_FEATURE_IBT);
> +	case MSR_IA32_PL0_SSP ... MSR_IA32_PL3_SSP:
> +		return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK);
> +	default:
> +		return false;
> +	}
> +}

With this new version of is_xstate_managed_msr(), which checks against vcpu
capabilities instead of KVM, patch 9 of KVM FRED patches[1] no longer needs
to make any change to it.  And this is the only conflict when I apply KVM
FRED patches on top of this v15 mega-CET patch series.

[1] https://lore.kernel.org/lkml/20250829153149.2871901-10-xin@zytor.com/