Explicitly finalize kvm_cpu_caps as part of each vendor's setup flow to
fix a bug where clearing SHSTK and IBT due to lack of CET XFEATURE support
makes kvm-intel.ko unloadable when nested=1. The late clearing results in
nested_vmx_setup_{entry,exit}_ctls() clearing VM_{ENTRY,EXIT}_LOAD_CET_STATE
when nested_vmx_setup_ctls_msrs() runs during the CPU compatibility checks,
ultimately leading to a mismatched VMCS config due to the reference config
having the CET bits set, but every CPU's "local" config having the bits
cleared.
Note, kvm_caps.supported_{xcr0,xss} are unconditionally initialized by
kvm_x86_vendor_init(), before calling into vendor code, and not referenced
between ops->hardware_setup() and their current/old location.
Fixes: 69cc3e886582 ("KVM: x86: Add XSS support for CET_KERNEL and CET_USER")
Cc: stable@vger.kernel.org
Cc: Mathias Krause <minipli@grsecurity.net>
Cc: John Allen <john.allen@amd.com>
Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>
Cc: Chao Gao <chao.gao@intel.com>
Cc: Binbin Wu <binbin.wu@linux.intel.com>
Cc: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/cpuid.c | 21 +++++++++++++++++++--
arch/x86/kvm/cpuid.h | 3 ++-
arch/x86/kvm/svm/svm.c | 4 +++-
arch/x86/kvm/vmx/vmx.c | 4 +++-
arch/x86/kvm/x86.c | 14 --------------
arch/x86/kvm/x86.h | 2 ++
6 files changed, 29 insertions(+), 19 deletions(-)
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 575244af9c9f..267e59b405c1 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -826,7 +826,7 @@ do { \
/* DS is defined by ptrace-abi.h on 32-bit builds. */
#undef DS
-void kvm_set_cpu_caps(void)
+void kvm_initialize_cpu_caps(void)
{
memset(kvm_cpu_caps, 0, sizeof(kvm_cpu_caps));
@@ -1289,7 +1289,24 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_clear(X86_FEATURE_RDPID);
}
}
-EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_set_cpu_caps);
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_initialize_cpu_caps);
+
+void kvm_finalize_cpu_caps(void)
+{
+ if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
+ kvm_caps.supported_xss = 0;
+
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
+
+ if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
+ }
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_finalize_cpu_caps);
#undef F
#undef SCATTERED_F
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index d3f5ae15a7ca..3b0b4b1adb97 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -8,7 +8,8 @@
#include <uapi/asm/kvm_para.h>
extern u32 kvm_cpu_caps[NR_KVM_CPU_CAPS] __read_mostly;
-void kvm_set_cpu_caps(void);
+void kvm_initialize_cpu_caps(void);
+void kvm_finalize_cpu_caps(void);
void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu);
struct kvm_cpuid_entry2 *kvm_find_cpuid_entry2(struct kvm_cpuid_entry2 *entries,
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7803d2781144..0c23fcaedcc5 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -5305,7 +5305,7 @@ static __init void svm_adjust_mmio_mask(void)
static __init void svm_set_cpu_caps(void)
{
- kvm_set_cpu_caps();
+ kvm_initialize_cpu_caps();
kvm_caps.supported_perf_cap = 0;
@@ -5387,6 +5387,8 @@ static __init void svm_set_cpu_caps(void)
*/
kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
kvm_cpu_cap_clear(X86_FEATURE_MSR_IMM);
+
+ kvm_finalize_cpu_caps();
}
static __init int svm_hardware_setup(void)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 27acafd03381..7d373e32ea9c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8173,7 +8173,7 @@ static __init u64 vmx_get_perf_capabilities(void)
static __init void vmx_set_cpu_caps(void)
{
- kvm_set_cpu_caps();
+ kvm_initialize_cpu_caps();
/* CPUID 0x1 */
if (nested)
@@ -8230,6 +8230,8 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
kvm_cpu_cap_clear(X86_FEATURE_IBT);
}
+
+ kvm_finalize_cpu_caps();
}
static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8acfdfc583a1..36385e6aebfa 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -220,7 +220,6 @@ static DEFINE_PER_CPU(struct kvm_user_return_msrs, user_return_msrs);
| XFEATURE_MASK_BNDCSR | XFEATURE_MASK_AVX512 \
| XFEATURE_MASK_PKRU | XFEATURE_MASK_XTILE)
-#define XFEATURE_MASK_CET_ALL (XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL)
/*
* Note, KVM supports exposing PT to the guest, but does not support context
* switching PT via XSTATE (KVM's PT virtualization relies on perf; swapping
@@ -10138,19 +10137,6 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (!tdp_enabled)
kvm_caps.supported_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT;
- if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
- kvm_caps.supported_xss = 0;
-
- if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
- !kvm_cpu_cap_has(X86_FEATURE_IBT))
- kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
-
- if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
- kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
- kvm_cpu_cap_clear(X86_FEATURE_IBT);
- kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
- }
-
if (kvm_caps.has_tsc_control) {
/*
* Make sure the user can only configure tsc_khz values that
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 70e81f008030..9edfac5d5ffb 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -483,6 +483,8 @@ extern struct kvm_host_values kvm_host;
extern bool enable_pmu;
extern bool enable_mediated_pmu;
+#define XFEATURE_MASK_CET_ALL (XFEATURE_MASK_CET_USER | XFEATURE_MASK_CET_KERNEL)
+
/*
* Get a filtered version of KVM's supported XCR0 that strips out dynamic
* features for which the current process doesn't (yet) have permission to use.
--
2.52.0.457.g6b5491de43-goog
On 1/24/2026 6:15 AM, Sean Christopherson wrote:
...
> +void kvm_finalize_cpu_caps(void)
It also finalizes the kvm_caps, at least kvm_caps.supported_xss, which
seems not consistent with the name.
Even more, just look at the function body, the name
"kvm_finalize_supported_xss" seems to fit better while clearing SHSTK
and IBT just the side effect of the finalized kvm_caps.supported_xss.
> +{
> + if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
> + kvm_caps.supported_xss = 0;
> +
> + if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
> + !kvm_cpu_cap_has(X86_FEATURE_IBT))
> + kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
> +
> + if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
> + kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
> + kvm_cpu_cap_clear(X86_FEATURE_IBT);
> + kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
> + }
> +}
On Tue, Jan 27, 2026, Xiaoyao Li wrote:
> On 1/24/2026 6:15 AM, Sean Christopherson wrote:
> ...
> > +void kvm_finalize_cpu_caps(void)
>
> It also finalizes the kvm_caps,
No, it just happens to update supported_xss as well.
> at least kvm_caps.supported_xss, which seems not consistent with the name.
I agree, but I don't see a clearly better option. E.g. kvm_finalize_cpu_caps()
could be pedantic and only touch cpu_caps:
if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES) ||
(kvm_host.xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
kvm_cpu_cap_clear(X86_FEATURE_IBT);
}
but then we have duplicate logic, and the connection between supported_xss and
SHSTK/IBT is lost.
The only viable alternative I can think of would be to provide a dedicated
kvm_set_xss_caps() and then do:
kvm_set_xss_caps();
kvm_finalize_cpu_caps();
where kvm_finalize_cpu_caps() just clears kvm_is_configuring_cpu_caps. Or I
suppose it could be:
kvm_set_xss_caps();
kvm_is_configuring_cpu_caps = false;
though I think I'd prefer to keep kvm_finalize_cpu_caps() and make it an inline.
Hmm, the more I look at that option, the more I like it? It's kinda silly,
especially if we end up with a whole pile of helpers, e.g.
kvm_set_xss_caps();
kvm_set_blah_caps();
kvm_set_loblaw_caps();
kvm_finalize_cpu_caps();
But at least for now, I definitely don't hate it.
> Even more, just look at the function body, the name
> "kvm_finalize_supported_xss" seems to fit better while clearing SHSTK and
> IBT just the side effect of the finalized kvm_caps.supported_xss.
No, I definitely want kvm_finalize_cpu_caps() somewhere, so that we end up with
kvm_initialize_cpu_caps() + kvm_finalize_cpu_caps(). The function happens to
only modify CET caps and thus only touches supported_xss as a side effect, but
the intent is very much that it will serve as the one and only place where KVM
makes "final" adjustments that are common to VMX and SVM.
But as above, I'm not opposed to having both. And it does provide a leaner diff
for the stable@ fix (though that's largely irrelevant since only 6.18 needs the
fix).
So this for patch 1 (not yet tested)?
From: Sean Christopherson <seanjc@google.com>
Date: Tue, 27 Jan 2026 08:14:27 -0800
Subject: [PATCH] KVM: x86: Configuring supported XSS from {svm,vmx}_set_cpu_caps()
Explicitly configure KVM's supported XSS as part of each vendor's setup
flow to fix a bug where clearing SHSTK and IBT in kvm_cpu_caps, e.g. due
to lack of CET XFEATURE support, makes kvm-intel.ko unloadable when nested
VMX is enabled, i.e. when nested=1. The late clearing results in
nested_vmx_setup_{entry,exit}_ctls() clearing VM_{ENTRY,EXIT}_LOAD_CET_STATE
when nested_vmx_setup_ctls_msrs() runs during the CPU compatibility checks,
ultimately leading to a mismatched VMCS config due to the reference config
having the CET bits set, but every CPU's "local" config having the bits
cleared.
Note, kvm_caps.supported_{xcr0,xss} are unconditionally initialized by
kvm_x86_vendor_init(), before calling into vendor code, and not referenced
between ops->hardware_setup() and their current/old location.
Fixes: 69cc3e886582 ("KVM: x86: Add XSS support for CET_KERNEL and CET_USER")
Cc: stable@vger.kernel.org
Cc: Mathias Krause <minipli@grsecurity.net>
Cc: John Allen <john.allen@amd.com>
Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>
Cc: Chao Gao <chao.gao@intel.com>
Cc: Binbin Wu <binbin.wu@linux.intel.com>
Cc: Xiaoyao Li <xiaoyao.li@intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/kvm/svm/svm.c | 2 ++
arch/x86/kvm/vmx/vmx.c | 2 ++
arch/x86/kvm/x86.c | 30 +++++++++++++++++-------------
arch/x86/kvm/x86.h | 2 ++
4 files changed, 23 insertions(+), 13 deletions(-)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 7803d2781144..c00a696dacfc 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -5387,6 +5387,8 @@ static __init void svm_set_cpu_caps(void)
*/
kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
kvm_cpu_cap_clear(X86_FEATURE_MSR_IMM);
+
+ kvm_setup_xss_caps();
}
static __init int svm_hardware_setup(void)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 27acafd03381..9f85c3829890 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8230,6 +8230,8 @@ static __init void vmx_set_cpu_caps(void)
kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
kvm_cpu_cap_clear(X86_FEATURE_IBT);
}
+
+ kvm_setup_xss_caps();
}
static bool vmx_is_io_intercepted(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 8acfdfc583a1..cac1d6a67b49 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9965,6 +9965,23 @@ static struct notifier_block pvclock_gtod_notifier = {
};
#endif
+void kvm_setup_xss_caps(void)
+{
+ if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
+ kvm_caps.supported_xss = 0;
+
+ if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
+ !kvm_cpu_cap_has(X86_FEATURE_IBT))
+ kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
+
+ if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
+ kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
+ kvm_cpu_cap_clear(X86_FEATURE_IBT);
+ kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
+ }
+}
+EXPORT_SYMBOL_FOR_KVM_INTERNAL(kvm_setup_xss_caps);
+
static inline void kvm_ops_update(struct kvm_x86_init_ops *ops)
{
memcpy(&kvm_x86_ops, ops->runtime_ops, sizeof(kvm_x86_ops));
@@ -10138,19 +10155,6 @@ int kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
if (!tdp_enabled)
kvm_caps.supported_quirks &= ~KVM_X86_QUIRK_IGNORE_GUEST_PAT;
- if (!kvm_cpu_cap_has(X86_FEATURE_XSAVES))
- kvm_caps.supported_xss = 0;
-
- if (!kvm_cpu_cap_has(X86_FEATURE_SHSTK) &&
- !kvm_cpu_cap_has(X86_FEATURE_IBT))
- kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
-
- if ((kvm_caps.supported_xss & XFEATURE_MASK_CET_ALL) != XFEATURE_MASK_CET_ALL) {
- kvm_cpu_cap_clear(X86_FEATURE_SHSTK);
- kvm_cpu_cap_clear(X86_FEATURE_IBT);
- kvm_caps.supported_xss &= ~XFEATURE_MASK_CET_ALL;
- }
-
if (kvm_caps.has_tsc_control) {
/*
* Make sure the user can only configure tsc_khz values that
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 70e81f008030..94d4f07aaaa0 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -483,6 +483,8 @@ extern struct kvm_host_values kvm_host;
extern bool enable_pmu;
extern bool enable_mediated_pmu;
+void kvm_setup_xss_caps(void);
+
/*
* Get a filtered version of KVM's supported XCR0 that strips out dynamic
* features for which the current process doesn't (yet) have permission to use.
base-commit: e81f7c908e1664233974b9f20beead78cde6343a
--
On Fri, Jan 23, 2026 at 02:15:40PM -0800, Sean Christopherson wrote:
>Explicitly finalize kvm_cpu_caps as part of each vendor's setup flow to
>fix a bug where clearing SHSTK and IBT due to lack of CET XFEATURE support
>makes kvm-intel.ko unloadable when nested=1. The late clearing results in
>nested_vmx_setup_{entry,exit}_ctls() clearing VM_{ENTRY,EXIT}_LOAD_CET_STATE
>when nested_vmx_setup_ctls_msrs() runs during the CPU compatibility checks,
>ultimately leading to a mismatched VMCS config due to the reference config
>having the CET bits set, but every CPU's "local" config having the bits
>cleared.
>
>Note, kvm_caps.supported_{xcr0,xss} are unconditionally initialized by
>kvm_x86_vendor_init(), before calling into vendor code, and not referenced
>between ops->hardware_setup() and their current/old location.
>
>Fixes: 69cc3e886582 ("KVM: x86: Add XSS support for CET_KERNEL and CET_USER")
>Cc: stable@vger.kernel.org
>Cc: Mathias Krause <minipli@grsecurity.net>
>Cc: John Allen <john.allen@amd.com>
>Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>
>Cc: Chao Gao <chao.gao@intel.com>
>Cc: Binbin Wu <binbin.wu@linux.intel.com>
>Cc: Xiaoyao Li <xiaoyao.li@intel.com>
>Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Chao Gao <chao.gao@intel.com>
© 2016 - 2026 Red Hat, Inc.