Currently, guest and host FPUs share the same default features. However,
the CET supervisor xstate is the first feature that needs to be enabled
exclusively for guest FPUs. Enabling it for host FPUs leads to a waste of
24 bytes in the XSAVE buffer.
To support "guest-only" features, add a new structure to hold the
default features and sizes for guest FPUs to clearly differentiate them
from those for host FPUs.
An alternative approach is adding a guest_only_xfeatures member to
fpu_kernel_cfg and adding two helper functions to calculate the guest
default xfeatures and size. However, calculating these defaults at runtime
would introduce unnecessary overhead.
Note that, for now, the default features for guest and host FPUs remain the
same. This will change in a follow-up patch once guest permissions, default
xfeatures, and fpstate size are all converted to use the guest defaults.
Suggested-by: Chang S. Bae <chang.seok.bae@intel.com>
Signed-off-by: Chao Gao <chao.gao@intel.com>
---
v5:
Add a new vcpu_fpu_config instead of adding new members to
fpu_state_config (Chang)
Extract a helper to set default values (Chang)
---
arch/x86/include/asm/fpu/types.h | 43 ++++++++++++++++++++++++++++++++
arch/x86/kernel/fpu/core.c | 1 +
arch/x86/kernel/fpu/xstate.c | 29 ++++++++++++++++-----
3 files changed, 67 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/fpu/types.h b/arch/x86/include/asm/fpu/types.h
index 9f9ed406b179..769155a0401a 100644
--- a/arch/x86/include/asm/fpu/types.h
+++ b/arch/x86/include/asm/fpu/types.h
@@ -542,6 +542,48 @@ struct fpu_guest {
struct fpstate *fpstate;
};
+/*
+ * FPU state configuration data for fpu_guest.
+ * Initialized at boot time. Read only after init.
+ */
+struct vcpu_fpu_config {
+ /*
+ * @size:
+ *
+ * The default size of the register state buffer in guest FPUs.
+ * Includes all supported features except independent managed
+ * features and features which have to be requested by user space
+ * before usage.
+ */
+ unsigned int size;
+
+ /*
+ * @user_size:
+ *
+ * The default UABI size of the register state buffer in guest
+ * FPUs. Includes all supported user features except independent
+ * managed features and features which have to be requested by
+ * user space before usage.
+ */
+ unsigned int user_size;
+
+ /*
+ * @features:
+ *
+ * The default supported features bitmap in guest FPUs. Does not
+ * include independent managed features and features which have to
+ * be requested by user space before usage.
+ */
+ u64 features;
+
+ /*
+ * @user_features:
+ *
+ * Same as @features except only user xfeatures are included.
+ */
+ u64 user_features;
+};
+
/*
* FPU state configuration data. Initialized at boot time. Read only after init.
*/
@@ -597,5 +639,6 @@ struct fpu_state_config {
/* FPU state configuration information */
extern struct fpu_state_config fpu_kernel_cfg, fpu_user_cfg;
+extern struct vcpu_fpu_config guest_default_cfg;
#endif /* _ASM_X86_FPU_TYPES_H */
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 28ad7ec56eaa..25f13cc8ad92 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -36,6 +36,7 @@ DEFINE_PER_CPU(u64, xfd_state);
/* The FPU state configuration data for kernel and user space */
struct fpu_state_config fpu_kernel_cfg __ro_after_init;
struct fpu_state_config fpu_user_cfg __ro_after_init;
+struct vcpu_fpu_config guest_default_cfg __ro_after_init;
/*
* Represents the initial FPU state. It's mostly (but not completely) zeroes,
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 6f10f5490022..cdd1e51fb93e 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -738,6 +738,11 @@ static int __init init_xstate_size(void)
fpu_user_cfg.default_size =
xstate_calculate_size(fpu_user_cfg.default_features, false);
+ guest_default_cfg.size =
+ xstate_calculate_size(guest_default_cfg.features, compacted);
+ guest_default_cfg.user_size =
+ xstate_calculate_size(guest_default_cfg.user_features, false);
+
return 0;
}
@@ -766,6 +771,22 @@ static void __init fpu__init_disable_system_xstate(unsigned int legacy_size)
fpstate_reset(¤t->thread.fpu);
}
+static void __init init_default_features(u64 kernel_max_features, u64 user_max_features)
+{
+ u64 kfeatures = kernel_max_features;
+ u64 ufeatures = user_max_features;
+
+ /* Default feature sets should not include dynamic xfeatures. */
+ kfeatures &= ~XFEATURE_MASK_USER_DYNAMIC;
+ ufeatures &= ~XFEATURE_MASK_USER_DYNAMIC;
+
+ fpu_kernel_cfg.default_features = kfeatures;
+ fpu_user_cfg.default_features = ufeatures;
+
+ guest_default_cfg.features = kfeatures;
+ guest_default_cfg.user_features = ufeatures;
+}
+
/*
* Enable and initialize the xsave feature.
* Called once per system bootup.
@@ -837,12 +858,8 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
fpu_user_cfg.max_features = fpu_kernel_cfg.max_features;
fpu_user_cfg.max_features &= XFEATURE_MASK_USER_SUPPORTED;
- /* Clean out dynamic features from default */
- fpu_kernel_cfg.default_features = fpu_kernel_cfg.max_features;
- fpu_kernel_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
-
- fpu_user_cfg.default_features = fpu_user_cfg.max_features;
- fpu_user_cfg.default_features &= ~XFEATURE_MASK_USER_DYNAMIC;
+ /* Now, given maximum feature set, determine default values */
+ init_default_features(fpu_kernel_cfg.max_features, fpu_user_cfg.max_features);
/* Store it for paranoia check at the end */
xfeatures = fpu_kernel_cfg.max_features;
--
2.46.1
On 4/10/2025 12:24 AM, Chao Gao wrote:
>
> +struct vcpu_fpu_config guest_default_cfg __ro_after_init;
I just noticed that the patch set is missing the initialization of
guest_default_cfg.size = size (or legacy_size) in the following functions:
fpu__init_system_xstate_size_legacy()
fpu__init_disable_system_xstate()
Without that, it looks buggy when XSAVE is either unavailable or disabled.
Thanks,
Chang
On Thu, May 01, 2025 at 07:24:25AM -0700, Chang S. Bae wrote: >On 4/10/2025 12:24 AM, Chao Gao wrote: >> >> +struct vcpu_fpu_config guest_default_cfg __ro_after_init; > >I just noticed that the patch set is missing the initialization of >guest_default_cfg.size = size (or legacy_size) in the following functions: > > fpu__init_system_xstate_size_legacy() > fpu__init_disable_system_xstate() > >Without that, it looks buggy when XSAVE is either unavailable or disabled. Good catch. Will fix them.
On Thu, 2025-04-10 at 15:24 +0800, Chao Gao wrote: > + > + /* > + * @user_size: > + * > + * The default UABI size of the register state buffer in guest > + * FPUs. Includes all supported user features except independent > + * managed features and features which have to be requested by > + * user space before usage. > + */ > + unsigned int user_size; > + > + /* > + * @features: > + * > + * The default supported features bitmap in guest FPUs. Does not > + * include independent managed features and features which have to > + * be requested by user space before usage. > + */ > + u64 features; > + > + /* > + * @user_features: > + * > + * Same as @features except only user xfeatures are included. > + */ > + u64 user_features; > +}; Tracing through the code, it seems that fpu_user_cfg.default_features and guest_default_cfg.user_features are the same, leading to fpu_user_cfg.default_size and guest_default_cfg.user_size being also the same. In the later patches, it doesn't seem to change the "user" parts. These configurations end up controlling the default size and features that gets copied to userspace in KVM_SET_XSAVE. I guess today there is only one default size and feature set for xstate copied to userspace. The suggestion from Chang was that it makes the code more readable, but it seems like it also breaks apart a unified concept for no functional benefit. Maybe we don't need user_features or user_size here in vcpu_fpu_config? Or did I get lost somewhere along the way in all the twists and turns that features and sizes go through.
On Fri, Apr 25, 2025 at 06:52:59AM +0800, Edgecombe, Rick P wrote:
>On Thu, 2025-04-10 at 15:24 +0800, Chao Gao wrote:
>> +
>> + /*
>> + * @user_size:
>> + *
>> + * The default UABI size of the register state buffer in guest
>> + * FPUs. Includes all supported user features except independent
>> + * managed features and features which have to be requested by
>> + * user space before usage.
>> + */
>> + unsigned int user_size;
>> +
>> + /*
>> + * @features:
>> + *
>> + * The default supported features bitmap in guest FPUs. Does not
>> + * include independent managed features and features which have to
>> + * be requested by user space before usage.
>> + */
>> + u64 features;
>> +
>> + /*
>> + * @user_features:
>> + *
>> + * Same as @features except only user xfeatures are included.
>> + */
>> + u64 user_features;
>> +};
>
>Tracing through the code, it seems that fpu_user_cfg.default_features and
>guest_default_cfg.user_features are the same, leading to
>fpu_user_cfg.default_size and guest_default_cfg.user_size being also the same.
Right. This is primarily for readability and symmetry.
I slightly prefer __guest_fpstate_reset() in this series:
fpstate->size = guest_default_cfg.size;
fpstate->user_size = guest_default_cfg.user_size;
fpstate->xfeatures = guest_default_cfg.features;
fpstate->user_xfeatures = guest_default_cfg.user_features;
over this version:
fpstate->size = guest_default_cfg.size;
fpstate->xfeatures = guest_default_cfg.features;
/*
* use fpu_user_cfg for user_* settings for compatibility of exiting
* uAPIs.
*/
fpstate->user_size = fpu_user_cfg.user_size;
fpstate->user_xfeatures = fpu_user_cfg.default_features;
Referencing different structures for size/xfeatures and their user_*
counterparts is not elegant to me. The need for a comment indicates that
this chunk may cause confusion. And this pattern will repeat when
initializing fpu->guest_perm in fpstate_reset().
>
>In the later patches, it doesn't seem to change the "user" parts. These
>configurations end up controlling the default size and features that gets copied
>to userspace in KVM_SET_XSAVE. I guess today there is only one default size and
>feature set for xstate copied to userspace. The suggestion from Chang was that
>it makes the code more readable, but it seems like it also breaks apart a
>unified concept for no functional benefit.
In the future, the feature and size of the uABI buffer for guest FPUs may
differ from those of non-guest FPUs. Sean rejected the idea of saving/restoring
CET_S xstate in KVM partly because:
:Especially because another big negative is that not utilizing XSTATE bleeds into
:KVM's ABI. Userspace has to be told to manually save+restore MSRs instead of just
:letting KVM_{G,S}ET_XSAVE handle the state. And that will create a bit of a
:snafu if Linux does gain support for SSS.
*: https://lore.kernel.org/kvm/ZM1jV3UPL0AMpVDI@google.com/
[To be clear, it is not an issue caused by Chang's suggestion. v4 which adds
new members @guest_size @guest_default_features to fpu_state_config has the
same problem. i.e., fpu_user_cfg.guest_default_feaures is identical to
fpu_user_cfg.default_features, adding no functional benefit.]
>
>Maybe we don't need user_features or user_size here in vcpu_fpu_config? Or did I
I don't have a strong opinion on this. I am ok with dropping them. Do you have
a strong preference?
>get lost somewhere along the way in all the twists and turns that features and
>sizes go through.
No, your analysis is correct.
>
>
On Fri, 2025-04-25 at 16:24 +0800, Chao Gao wrote:
> >
> > In the later patches, it doesn't seem to change the "user" parts. These
> > configurations end up controlling the default size and features that gets
> > copied
> > to userspace in KVM_SET_XSAVE. I guess today there is only one default size
> > and
> > feature set for xstate copied to userspace. The suggestion from Chang was
> > that
> > it makes the code more readable, but it seems like it also breaks apart a
> > unified concept for no functional benefit.
>
> In the future, the feature and size of the uABI buffer for guest FPUs may
> differ from those of non-guest FPUs. Sean rejected the idea of
> saving/restoring
> CET_S xstate in KVM partly because:
>
> :Especially because another big negative is that not utilizing XSTATE bleeds
> into
> :KVM's ABI. Userspace has to be told to manually save+restore MSRs instead
> of just
> :letting KVM_{G,S}ET_XSAVE handle the state.
Hmm, interesting. I guess there are two things.
1. Should CET_S be part of KVM_GET_XSAVE instead of via MSRs ioctls? It never
was in the KVM CET patches.
2. A feature mask far away in the FPU code controls KVM's xsave ABI.
For (1), does any userspace depend on their not being supervisor features? (i.e.
tries to restore the guest FPU for emulation or something). There probably are
some advantages to keeping supervisor features out of it, or at least a separate
ioctl. (2) is an existing problem. But if we think KVM should have its own
feature set of bits for ABI purposes, it seems like maybe it should have some
dedicated consideration.
I'd think we should not try to address it in this series. Let's stick to what
the current CET KVM series needs. Changing KVM_GET_XSAVE behavior or cleanup
could be a separate series.
> And that will create a bit of a
> :snafu if Linux does gain support for SSS.
>
> *: https://lore.kernel.org/kvm/ZM1jV3UPL0AMpVDI@google.com/
I chatted with Xin about this a few weeks ago. It sounds like FRED bare metal
SSS will not need CET_S state, but it wasn't 100% clear.
>
On 4/25/2025 9:09 AM, Edgecombe, Rick P wrote:
>> And that will create a bit of a
>> :snafu if Linux does gain support for SSS.
>>
>> *:https://lore.kernel.org/kvm/ZM1jV3UPL0AMpVDI@google.com/
> I chatted with Xin about this a few weeks ago. It sounds like FRED bare metal
> SSS will not need CET_S state, but it wasn't 100% clear.
FRED reuses one CET_S MSR IA32_PL0_SSP, and give it an alias
IA32_FRED_SSP0.
Thanks!
Xin
On 4/27/2025 10:51 PM, Xin Li wrote:
> On 4/25/2025 9:09 AM, Edgecombe, Rick P wrote:
>>> And that will create a bit of a
>>> :snafu if Linux does gain support for SSS.
>>>
>>> *:https://lore.kernel.org/kvm/ZM1jV3UPL0AMpVDI@google.com/
>> I chatted with Xin about this a few weeks ago. It sounds like FRED
>> bare metal
>> SSS will not need CET_S state, but it wasn't 100% clear.
>
> FRED reuses one CET_S MSR IA32_PL0_SSP, and give it an alias
> IA32_FRED_SSP0.
Native use of IA32_FRED_SSP0 is very much like IA32_FRED_RSP0:
1) Both are per-task constants.
2) Both are only used for delivering events when running userspace.
IA32_FRED_RSP0 is set on return to userspace:
https://web.git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fe85ee391966c4cf3bfe1c405314e894c951f521
I suppose we'll likely apply the same approach to IA32_FRED_SSP0 if we
plan to enable SSS for the kernel. This won't add any extra maintenance
cost, as both x86 and KVM maintainers are well aware.
Thanks!
Xin
On Fri, Apr 25, 2025, Rick P Edgecombe wrote:
> On Fri, 2025-04-25 at 16:24 +0800, Chao Gao wrote:
> > >
> > > In the later patches, it doesn't seem to change the "user" parts. These
> > > configurations end up controlling the default size and features that gets
> > > copied
> > > to userspace in KVM_SET_XSAVE. I guess today there is only one default size
> > > and
> > > feature set for xstate copied to userspace. The suggestion from Chang was
> > > that
> > > it makes the code more readable, but it seems like it also breaks apart a
> > > unified concept for no functional benefit.
> >
> > In the future, the feature and size of the uABI buffer for guest FPUs may
> > differ from those of non-guest FPUs. Sean rejected the idea of
> > saving/restoring
> > CET_S xstate in KVM partly because:
> >
> > :Especially because another big negative is that not utilizing XSTATE bleeds
> > into
> > :KVM's ABI. Userspace has to be told to manually save+restore MSRs instead
> > of just
> > :letting KVM_{G,S}ET_XSAVE handle the state.
>
> Hmm, interesting. I guess there are two things.
> 1. Should CET_S be part of KVM_GET_XSAVE instead of via MSRs ioctls? It never
> was in the KVM CET patches.
> 2. A feature mask far away in the FPU code controls KVM's xsave ABI.
>
> For (1), does any userspace depend on their not being supervisor features? (i.e.
> tries to restore the guest FPU for emulation or something). There probably are
> some advantages to keeping supervisor features out of it, or at least a separate
> ioctl.
CET_S probably shouldn't be in XSAVE ABI, because that would technically leak
kernel state to userspace for the non-KVM use case. I assume the kernel has
bigger problems if CET_S is somehow tied to a userspace task.
For KVM, it's just the one MSR, and KVM needs to support save/restore of that MSR
no matter what, so supporting it via XSAVE would be more work, a bit sketchy, and
create yet another way for userspace to do weird things when saving/restoring vCPU
state.
> (2) is an existing problem. But if we think KVM should have its own
> feature set of bits for ABI purposes, it seems like maybe it should have some
> dedicated consideration.
Nah, don't bother. The kernel needs to solve the exact same problems for the
signal ABI, I don't see any reason to generate more work. From a validation
coverage perspective, I see a lot of value in shared code.
On Fri, 2025-04-25 at 16:48 -0700, Sean Christopherson wrote: > > (2) is an existing problem. But if we think KVM should have its own > > feature set of bits for ABI purposes, it seems like maybe it should have > > some > > dedicated consideration. > > Nah, don't bother. The kernel needs to solve the exact same problems for the > signal ABI, I don't see any reason to generate more work. From a validation > coverage perspective, I see a lot of value in shared code. Right, so there should be no need to keep a separate features and buffer size for KVM's xsave UABI, as this patch does. Let's just leave it using the core kernels UABI version.
On 4/28/2025 8:42 AM, Edgecombe, Rick P wrote: > > Right, so there should be no need to keep a separate features and buffer size > for KVM's xsave UABI, as this patch does. Let's just leave it using the core > kernels UABI version. Hmm, why so? As I see it, the vcpu->arch.guest_fpu structure is already exposed to KVM. This series doesn’t modify those structures (fpu_guest and fpstate), other than removing a dead field (patch 2). Both ->usersize and ->user_xfeatures fields are already exposed -- currently KVM just doesn’t reference them at all. All the changes introduced here are transparent to KVM. Organizing the initial values and wiring up guest_perm and fpstate are entirely internal to the x86 core, no?
On Mon, 2025-04-28 at 18:11 -0700, Chang S. Bae wrote: > On 4/28/2025 8:42 AM, Edgecombe, Rick P wrote: > > > > Right, so there should be no need to keep a separate features and buffer size > > for KVM's xsave UABI, as this patch does. Let's just leave it using the core > > kernels UABI version. > > Hmm, why so? > > As I see it, the vcpu->arch.guest_fpu structure is already exposed to > KVM. This series doesn’t modify those structures (fpu_guest and > fpstate), other than removing a dead field (patch 2). > > Both ->usersize and ->user_xfeatures fields are already exposed -- > currently KVM just doesn’t reference them at all. > > All the changes introduced here are transparent to KVM. Organizing the > initial values and wiring up guest_perm and fpstate are entirely > internal to the x86 core, no? This patch adds struct vcpu_fpu_config, with new fields user_size, user_features. Then those fields are used to configure the guest FPU, where today it just uses fpu_user_cfg.default_features, etc. KVM doesn't refer to any of those fields specifically, but since they are used to configure struct fpu_guest they become part of KVM's uABI. Per Sean, KVM's KVM_GET_XSAVE API won't differ from arch/x86's uABI behavior. There is (and will be) only one default user features and size. So what is the point of having a special guest version with identical values? Just use the single one for guest FPU and normal. Chao mentioned offline it was for symmetry. I don't want to make a big deal out of it, but it doesn't make sense to me. It made me wonder if there was some divergence in KVM and arch/x86 user features.
On 4/28/2025 7:50 PM, Edgecombe, Rick P wrote: > On Mon, 2025-04-28 at 18:11 -0700, Chang S. Bae wrote: >> On 4/28/2025 8:42 AM, Edgecombe, Rick P wrote: >>> >>> Right, so there should be no need to keep a separate features and buffer size >>> for KVM's xsave UABI, as this patch does. Let's just leave it using the core >>> kernels UABI version. >> >> Hmm, why so? >> >> As I see it, the vcpu->arch.guest_fpu structure is already exposed to >> KVM. This series doesn’t modify those structures (fpu_guest and >> fpstate), other than removing a dead field (patch 2). >> >> Both ->usersize and ->user_xfeatures fields are already exposed -- >> currently KVM just doesn’t reference them at all. >> >> All the changes introduced here are transparent to KVM. Organizing the >> initial values and wiring up guest_perm and fpstate are entirely >> internal to the x86 core, no? > > This patch adds struct vcpu_fpu_config, with new fields user_size, > user_features. Then those fields are used to configure the guest FPU, where > today it just uses fpu_user_cfg.default_features, etc. > > KVM doesn't refer to any of those fields specifically, but since they are used > to configure struct fpu_guest they become part of KVM's uABI. Today, fpu_alloc_guest_fpstate() -> __fpstate_reset() sets vcpu->arch.guest_fpu.fpstate->user_xfeatures using fpu_user_cfg.default_features. Are you really saying that switching this to guest_default_cfg.user_features would constitute a uABI change? Do you consider fpu_user_cfg.default_features to be part of the uABI or anything else?
On Mon, 2025-04-28 at 20:22 -0700, Chang S. Bae wrote: > > > > This patch adds struct vcpu_fpu_config, with new fields user_size, > > user_features. Then those fields are used to configure the guest FPU, where > > today it just uses fpu_user_cfg.default_features, etc. > > > > KVM doesn't refer to any of those fields specifically, but since they are > > used > > to configure struct fpu_guest they become part of KVM's uABI. > > Today, fpu_alloc_guest_fpstate() -> __fpstate_reset() sets > vcpu->arch.guest_fpu.fpstate->user_xfeatures using > fpu_user_cfg.default_features. > > Are you really saying that switching this to > guest_default_cfg.user_features would constitute a uABI change? I'm not saying there is a uABI change... I don't see a change in uABI. > Do you > consider fpu_user_cfg.default_features to be part of the uABI or > anything else? KVM_GET_XSAVE is part of KVM's API. It uses fields configured in struct fpu_guest. If fpu_user_cfg.default_features changes value (in the current code) it would change KVM's uABI. But I'm starting to suspect we are talking past each other. It should be simple. Two new configuration fields are added in this patch that match the existing concept and values of existing configurations fields. Per Sean, there are no plans to have them diverge. So why add them. If anyone feels strongly, I won't argue. But I think there is just miscommunication.
On 4/28/2025 8:36 PM, Edgecombe, Rick P wrote:
>
> KVM_GET_XSAVE is part of KVM's API. It uses fields configured in struct
> fpu_guest. If fpu_user_cfg.default_features changes value (in the current code)
> it would change KVM's uABI.
Not quite. The ABI reflects the XSAVE format directly. The XSAVE header
indicates which feature states are present, so while the _contents_ of
the buffer may vary depending on the feature set, the _format_ itself
remains unchanged. That doesn't constitute a uABI change.
> It should be simple. Two new configuration fields are added in this patch that
> match the existing concept and values of existing configurations fields. Per
> Sean, there are no plans to have them diverge. So why add them.
I'm fine with dropping them -- as long as the resulting code remains
clear and avoids unnecessary complexity around VCPU allocation.
Here are some of the considerations that led me to suggest them in the
first place:
* The guest-only feature model should be established in a clean and
structured way.
* The initialization logic should stay explicit -- especially to make
it clear what constitutes guest features, even when they match host
features. That naturally led to introducing a dedicated data
structure.
* Since the VCPU FPU container includes struct fpstate, it felt
appropriate to mirror relevant fields where useful.
* Including user_size and user_xfeatures made the VCPU allocation logic
more straightforward and self-contained.
And to clarify -- this addition doesn’t necessarily imply divergence
from fpu_guest_cfg. Its usage is local to setting up the guest fpstate,
and nothing more.
On Wed, 2025-04-30 at 08:01 -0700, Chang S. Bae wrote: > On 4/28/2025 8:36 PM, Edgecombe, Rick P wrote: > > > > KVM_GET_XSAVE is part of KVM's API. It uses fields configured in struct > > fpu_guest. If fpu_user_cfg.default_features changes value (in the current code) > > it would change KVM's uABI. > > Not quite. The ABI reflects the XSAVE format directly. The XSAVE header > indicates which feature states are present, so while the _contents_ of > the buffer may vary depending on the feature set, the _format_ itself > remains unchanged. That doesn't constitute a uABI change. Heh, ok sure. > > > It should be simple. Two new configuration fields are added in this patch that > > match the existing concept and values of existing configurations fields. Per > > Sean, there are no plans to have them diverge. So why add them. > > I'm fine with dropping them -- as long as the resulting code remains > clear and avoids unnecessary complexity around VCPU allocation. > > Here are some of the considerations that led me to suggest them in the > first place: > > * The guest-only feature model should be established in a clean and > structured way. > * The initialization logic should stay explicit -- especially to make > it clear what constitutes guest features, even when they match host > features. That naturally led to introducing a dedicated data > structure. > * Since the VCPU FPU container includes struct fpstate, it felt > appropriate to mirror relevant fields where useful. > * Including user_size and user_xfeatures made the VCPU allocation logic > more straightforward and self-contained. > > And to clarify -- this addition doesn’t necessarily imply divergence > from fpu_guest_cfg. Its usage is local to setting up the guest fpstate, > and nothing more. I'd like to close this out. I see there there is currently one concept of user features and size, and per Sean, KVM intends to stay consistent with the rest of the kernel - leaving it at one concept. This was new info since you suggested the fields. So why don't you propose a resolution here and we'll just go with it.
On Wed, Apr 30, 2025, Rick P Edgecombe wrote:
> On Wed, 2025-04-30 at 08:01 -0700, Chang S. Bae wrote:
> > On 4/28/2025 8:36 PM, Edgecombe, Rick P wrote:
> > >
> > > KVM_GET_XSAVE is part of KVM's API. It uses fields configured in struct
> > > fpu_guest. If fpu_user_cfg.default_features changes value (in the current code)
> > > it would change KVM's uABI.
> >
> > Not quite. The ABI reflects the XSAVE format directly. The XSAVE header
> > indicates which feature states are present, so while the _contents_ of
> > the buffer may vary depending on the feature set, the _format_ itself
> > remains unchanged. That doesn't constitute a uABI change.
>
> Heh, ok sure.
Hmm, it's a valid point that format isn't changing, and that host userspace and
guests will inevitably have different state in the XSAVE buffer.
That said, it's still an ABI change in the sense that once support for CET_S is
added, userspace can rely on KVM_{G,S}ET_XSAVE(2) to save/restore CET_S state,
and dropping that support would clearly break userspace.
> > > It should be simple. Two new configuration fields are added in this patch that
> > > match the existing concept and values of existing configurations fields. Per
> > > Sean, there are no plans to have them diverge. So why add them.
> >
> > I'm fine with dropping them -- as long as the resulting code remains
> > clear and avoids unnecessary complexity around VCPU allocation.
> >
> > Here are some of the considerations that led me to suggest them in the
> > first place:
> >
> > * The guest-only feature model should be established in a clean and
> > structured way.
> > * The initialization logic should stay explicit -- especially to make
> > it clear what constitutes guest features, even when they match host
> > features. That naturally led to introducing a dedicated data
> > structure.
> > * Since the VCPU FPU container includes struct fpstate, it felt
> > appropriate to mirror relevant fields where useful.
> > * Including user_size and user_xfeatures made the VCPU allocation logic
> > more straightforward and self-contained.
> >
> > And to clarify -- this addition doesn’t necessarily imply divergence
> > from fpu_guest_cfg. Its usage is local to setting up the guest fpstate,
> > and nothing more.
>
> I'd like to close this out. I see there there is currently one concept of user
> features and size, and per Sean, KVM intends to stay consistent with the rest of
> the kernel - leaving it at one concept. This was new info since you suggested
> the fields. So why don't you propose a resolution here and we'll just go with
> it.
I'm not totally opposed to diverging, but IMO there needs to be strong motivation
to do so. Sharing code and ABI between KVM and the kernel is mutually beneficial
on multiple fronts.
Unless I've missed something, KVM will need to provide save/restore support via
MSRs for all CET_S state anyways, so I don't see any motivation whatsoever in this
particular case.
On 4/30/2025 9:20 AM, Sean Christopherson wrote:
> On Wed, Apr 30, 2025, Rick P Edgecombe wrote:
>> On Wed, 2025-04-30 at 08:01 -0700, Chang S. Bae wrote:
>>> On 4/28/2025 8:36 PM, Edgecombe, Rick P wrote:
>>>>
>>>> KVM_GET_XSAVE is part of KVM's API. It uses fields configured in struct
>>>> fpu_guest. If fpu_user_cfg.default_features changes value (in the current code)
>>>> it would change KVM's uABI.
>>>
>>> Not quite. The ABI reflects the XSAVE format directly. The XSAVE header
>>> indicates which feature states are present, so while the _contents_ of
>>> the buffer may vary depending on the feature set, the _format_ itself
>>> remains unchanged. That doesn't constitute a uABI change.
>>
>> Heh, ok sure.
>
> Hmm, it's a valid point that format isn't changing, and that host userspace and
> guests will inevitably have different state in the XSAVE buffer.
>
> That said, it's still an ABI change in the sense that once support for CET_S is
> added, userspace can rely on KVM_{G,S}ET_XSAVE(2) to save/restore CET_S state,
> and dropping that support would clearly break userspace.
I think my comment was specifically in response to this statement "if
fpu_user_cfg.default_features changes value," which I took to mean
changes limited to user features.
Diverging guest user features wasn’t something I intended here --
although I briefly considered MPX but dropped it due to complexity:
https://lore.kernel.org/lkml/2ac2d1e7-d04b-443a-8fff-7aa3f436dcce@intel.com/
At this point, I think the reaction here speaks for itself. If adding
those two fields leads to confusion or demands fatty code comment, the
net benefit goes negative.
So yes, overall, let's just reference fpu_guest_cfg directly as-is.
Thanks,
Chang
On Tue, Apr 29, 2025 at 11:36:40AM +0800, Edgecombe, Rick P wrote: >On Mon, 2025-04-28 at 20:22 -0700, Chang S. Bae wrote: >> > >> > This patch adds struct vcpu_fpu_config, with new fields user_size, >> > user_features. Then those fields are used to configure the guest FPU, where >> > today it just uses fpu_user_cfg.default_features, etc. >> > >> > KVM doesn't refer to any of those fields specifically, but since they are >> > used >> > to configure struct fpu_guest they become part of KVM's uABI. >> >> Today, fpu_alloc_guest_fpstate() -> __fpstate_reset() sets >> vcpu->arch.guest_fpu.fpstate->user_xfeatures using >> fpu_user_cfg.default_features. >> >> Are you really saying that switching this to >> guest_default_cfg.user_features would constitute a uABI change? > >I'm not saying there is a uABI change... I don't see a change in uABI. Yes. We all agree that this series has no uABI change. > >> Do you >> consider fpu_user_cfg.default_features to be part of the uABI or >> anything else? > >KVM_GET_XSAVE is part of KVM's API. It uses fields configured in struct >fpu_guest. If fpu_user_cfg.default_features changes value (in the current code) >it would change KVM's uABI. But I'm starting to suspect we are talking past each >other. > >It should be simple. Two new configuration fields are added in this patch that Yes. it is a minor issue. >match the existing concept and values of existing configurations fields. Per >Sean, there are no plans to have them diverge. So why add them. If anyone feels >strongly, I won't argue. But I think there is just miscommunication. Ok. I will drop vcpu_fpu_config.user*.
On 4/25/2025 4:48 PM, Sean Christopherson wrote: > CET_S probably shouldn't be in XSAVE ABI, because that would technically leak > kernel state to userspace for the non-KVM use case. I assume the kernel has > bigger problems if CET_S is somehow tied to a userspace task. +1000
>> Hmm, interesting. I guess there are two things.
>> 1. Should CET_S be part of KVM_GET_XSAVE instead of via MSRs ioctls? It never
>> was in the KVM CET patches.
>> 2. A feature mask far away in the FPU code controls KVM's xsave ABI.
>>
>> For (1), does any userspace depend on their not being supervisor features? (i.e.
>> tries to restore the guest FPU for emulation or something). There probably are
>> some advantages to keeping supervisor features out of it, or at least a separate
>> ioctl.
>
>CET_S probably shouldn't be in XSAVE ABI, because that would technically leak
>kernel state to userspace for the non-KVM use case.
ok. thanks for the clarification.
>I assume the kernel has
>bigger problems if CET_S is somehow tied to a userspace task.
To be clear, CET_S here refers to the CET supervisor state, which includes SSP
pointers for privilege levels 0 through 2. The IA32_S_CET MSR is not part of
that state.
>
>For KVM, it's just the one MSR, and KVM needs to support save/restore of that MSR
>no matter what, so supporting it via XSAVE would be more work, a bit sketchy, and
>create yet another way for userspace to do weird things when saving/restoring vCPU
>state.
Agreed. One more issue of including CET_S into KVM_GET/SET_XSAVE{2} is:
XSAVE UABI buffers adhere to the standard format defined by the SDM, which
never includes supervisor states. Attempting to incorporate supervisor states
into UABI buffers would lead to many issues, such as deviating from the
standard format and the need to define offsets for each supervisor state.
On Mon, Apr 28, 2025, Chao Gao wrote:
> >I assume the kernel has bigger problems if CET_S is somehow tied to a
> >userspace task.
>
> To be clear, CET_S here refers to the CET supervisor state, which includes SSP
> pointers for privilege levels 0 through 2. The IA32_S_CET MSR is not part of
> that state.
>
> >
> >For KVM, it's just the one MSR, and KVM needs to support save/restore of that MSR
> >no matter what,
Oh, it's not just one MSR. I was indeed thinking this was just IA32_S_CET. But
lucky for me, the statement holds for SSP0-SS2.
> so supporting it via XSAVE would be more work, a bit sketchy, and
> >create yet another way for userspace to do weird things when saving/restoring vCPU
> >state.
>
> Agreed. One more issue of including CET_S into KVM_GET/SET_XSAVE{2} is:
>
> XSAVE UABI buffers adhere to the standard format defined by the SDM, which
> never includes supervisor states. Attempting to incorporate supervisor states
> into UABI buffers would lead to many issues, such as deviating from the
> standard format and the need to define offsets for each supervisor state.
On 4/27/2025 8:26 PM, Chao Gao wrote:
>> For KVM, it's just the one MSR, and KVM needs to support save/restore of that MSR
>> no matter what, so supporting it via XSAVE would be more work, a bit sketchy, and
>> create yet another way for userspace to do weird things when saving/restoring vCPU
>> state.
> Agreed. One more issue of including CET_S into KVM_GET/SET_XSAVE{2} is:
>
> XSAVE UABI buffers adhere to the standard format defined by the SDM, which
> never includes supervisor states. Attempting to incorporate supervisor states
> into UABI buffers would lead to many issues, such as deviating from the
> standard format and the need to define offsets for each supervisor state.
Good point.
FRED RSPs are in the supervisor state and are not defined within the
XSAVE area. Their save/restore operations need to be explicitly added
one by one.
Would it be sensible to introduce a KVM supervisor MSR state that
includes new MSRs?
This approach is similar to what KVM has with its CPUID API, allowing
batch MSR set/get operations and improving MSR code sharing. Since the
FRED state is entirely defined in MSRs (except for CR4.FRED), this
should simplify the context save and restore for FRED.
Thanks!
Xin
© 2016 - 2025 Red Hat, Inc.