arch/x86/kvm/svm/nested.c | 19 +++++++++++++++++++ arch/x86/kvm/svm/svm.h | 35 +++++++++++++++++++++++++++-------- 2 files changed, 46 insertions(+), 8 deletions(-)
If a feature is not advertised in the guest's CPUID, prevent L1 from
intercepting the unsupported instructions by clearing the corresponding
intercept in KVM's cached vmcb12.
When an L2 guest executes an instruction that is not advertised to L1,
we expect a #UD exception to be injected by L0. However, the nested svm
exit handler first checks if the instruction intercept is set in vmcb12,
and if so, synthesizes an exit from L2 to L1 instead of a #UD exception.
If a feature is not advertised, the L1 intercept should be ignored.
While creating KVM's cached vmcb12, sanitize the intercepts for
instructions that are not advertised in the guest CPUID. This
effectively ignores the L1 intercept on nested vm exit handling.
Signed-off-by: Kevin Cheng <chengkev@google.com>
---
v1 -> v2:
- Removed nested_intercept_mask which was a bit mask for nested
intercepts to ignore.
- Now sanitizing intercepts every time cached vmcb12 is created
- New wrappers for vmcb set/clear intercept functions
- Added macro functions for vmcb12 intercept sanitizing
- All changes suggested by Sean. Thanks!
- https://lore.kernel.org/all/20251205070630.4013452-1-chengkev@google.com/
arch/x86/kvm/svm/nested.c | 19 +++++++++++++++++++
arch/x86/kvm/svm/svm.h | 35 +++++++++++++++++++++++++++--------
2 files changed, 46 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index c81005b245222..5ffc12a315ec7 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -403,6 +403,19 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
return __nested_vmcb_check_controls(vcpu, ctl);
}
+/*
+ * If a feature is not advertised to L1, clear the corresponding vmcb12
+ * intercept.
+ */
+#define __nested_svm_sanitize_intercept(__vcpu, __control, fname, iname) \
+do { \
+ if (!guest_cpu_cap_has(__vcpu, X86_FEATURE_##fname)) \
+ vmcb12_clr_intercept(__control, INTERCEPT_##iname); \
+} while (0)
+
+#define nested_svm_sanitize_intercept(__vcpu, __control, name) \
+ __nested_svm_sanitize_intercept(__vcpu, __control, name, name)
+
static
void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
struct vmcb_ctrl_area_cached *to,
@@ -413,6 +426,12 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
for (i = 0; i < MAX_INTERCEPT; i++)
to->intercepts[i] = from->intercepts[i];
+ __nested_svm_sanitize_intercept(vcpu, to, XSAVE, XSETBV);
+ nested_svm_sanitize_intercept(vcpu, to, INVPCID);
+ nested_svm_sanitize_intercept(vcpu, to, RDTSCP);
+ nested_svm_sanitize_intercept(vcpu, to, SKINIT);
+ nested_svm_sanitize_intercept(vcpu, to, RDPRU);
+
to->iopm_base_pa = from->iopm_base_pa;
to->msrpm_base_pa = from->msrpm_base_pa;
to->tsc_offset = from->tsc_offset;
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 9e151dbdef25d..7a8c92c4de2fb 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -434,28 +434,47 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
*/
#define SVM_REGS_LAZY_LOAD_SET (1 << VCPU_EXREG_PDPTR)
-static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
+static inline void __vmcb_set_intercept(unsigned long *intercepts, u32 bit)
{
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
- __set_bit(bit, (unsigned long *)&control->intercepts);
+ __set_bit(bit, intercepts);
}
-static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
+static inline void __vmcb_clr_intercept(unsigned long *intercepts, u32 bit)
{
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
- __clear_bit(bit, (unsigned long *)&control->intercepts);
+ __clear_bit(bit, intercepts);
}
-static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
+static inline bool __vmcb_is_intercept(unsigned long *intercepts, u32 bit)
{
WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
- return test_bit(bit, (unsigned long *)&control->intercepts);
+ return test_bit(bit, intercepts);
+}
+
+static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
+{
+ __vmcb_set_intercept((unsigned long *)&control->intercepts, bit);
+}
+
+static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
+{
+ __vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
+}
+
+static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
+{
+ return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
+}
+
+static inline void vmcb12_clr_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
+{
+ __vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
}
static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
{
- WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
- return test_bit(bit, (unsigned long *)&control->intercepts);
+ return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
}
static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
--
2.52.0.239.gd5f0c6e74e-goog
On Mon, Dec 15, 2025 at 04:07:10PM +0000, Kevin Cheng wrote:
> If a feature is not advertised in the guest's CPUID, prevent L1 from
> intercepting the unsupported instructions by clearing the corresponding
> intercept in KVM's cached vmcb12.
>
> When an L2 guest executes an instruction that is not advertised to L1,
> we expect a #UD exception to be injected by L0. However, the nested svm
> exit handler first checks if the instruction intercept is set in vmcb12,
> and if so, synthesizes an exit from L2 to L1 instead of a #UD exception.
> If a feature is not advertised, the L1 intercept should be ignored.
>
> While creating KVM's cached vmcb12, sanitize the intercepts for
> instructions that are not advertised in the guest CPUID. This
> effectively ignores the L1 intercept on nested vm exit handling.
Nit: It also ignores the L1 intercept when computing the intercepts in
VMCB02, so if L0 (for some reason) does not intercept the instruction,
KVM won't intercept it at all.
I don't think this should happen because KVM should always intercept
unsupported instructions to inject a #UD, unless they are not supported
by HW, in which case I believe the HW will inject the #UD for us.
>
> Signed-off-by: Kevin Cheng <chengkev@google.com>
Maybe also this since Sean contributed code to the patch in his last
review?
Co-developed-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
Otherwise looks good to me, FWIW:
Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
> ---
> v1 -> v2:
> - Removed nested_intercept_mask which was a bit mask for nested
> intercepts to ignore.
> - Now sanitizing intercepts every time cached vmcb12 is created
> - New wrappers for vmcb set/clear intercept functions
> - Added macro functions for vmcb12 intercept sanitizing
> - All changes suggested by Sean. Thanks!
> - https://lore.kernel.org/all/20251205070630.4013452-1-chengkev@google.com/
>
> arch/x86/kvm/svm/nested.c | 19 +++++++++++++++++++
> arch/x86/kvm/svm/svm.h | 35 +++++++++++++++++++++++++++--------
> 2 files changed, 46 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index c81005b245222..5ffc12a315ec7 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -403,6 +403,19 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
> return __nested_vmcb_check_controls(vcpu, ctl);
> }
>
> +/*
> + * If a feature is not advertised to L1, clear the corresponding vmcb12
> + * intercept.
> + */
> +#define __nested_svm_sanitize_intercept(__vcpu, __control, fname, iname) \
> +do { \
> + if (!guest_cpu_cap_has(__vcpu, X86_FEATURE_##fname)) \
> + vmcb12_clr_intercept(__control, INTERCEPT_##iname); \
> +} while (0)
> +
> +#define nested_svm_sanitize_intercept(__vcpu, __control, name) \
> + __nested_svm_sanitize_intercept(__vcpu, __control, name, name)
> +
> static
> void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
> struct vmcb_ctrl_area_cached *to,
> @@ -413,6 +426,12 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
> for (i = 0; i < MAX_INTERCEPT; i++)
> to->intercepts[i] = from->intercepts[i];
>
> + __nested_svm_sanitize_intercept(vcpu, to, XSAVE, XSETBV);
> + nested_svm_sanitize_intercept(vcpu, to, INVPCID);
> + nested_svm_sanitize_intercept(vcpu, to, RDTSCP);
> + nested_svm_sanitize_intercept(vcpu, to, SKINIT);
> + nested_svm_sanitize_intercept(vcpu, to, RDPRU);
> +
> to->iopm_base_pa = from->iopm_base_pa;
> to->msrpm_base_pa = from->msrpm_base_pa;
> to->tsc_offset = from->tsc_offset;
> diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> index 9e151dbdef25d..7a8c92c4de2fb 100644
> --- a/arch/x86/kvm/svm/svm.h
> +++ b/arch/x86/kvm/svm/svm.h
> @@ -434,28 +434,47 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
> */
> #define SVM_REGS_LAZY_LOAD_SET (1 << VCPU_EXREG_PDPTR)
>
> -static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
> +static inline void __vmcb_set_intercept(unsigned long *intercepts, u32 bit)
> {
> WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> - __set_bit(bit, (unsigned long *)&control->intercepts);
> + __set_bit(bit, intercepts);
> }
>
> -static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
> +static inline void __vmcb_clr_intercept(unsigned long *intercepts, u32 bit)
> {
> WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> - __clear_bit(bit, (unsigned long *)&control->intercepts);
> + __clear_bit(bit, intercepts);
> }
>
> -static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
> +static inline bool __vmcb_is_intercept(unsigned long *intercepts, u32 bit)
> {
> WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> - return test_bit(bit, (unsigned long *)&control->intercepts);
> + return test_bit(bit, intercepts);
> +}
> +
> +static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
> +{
> + __vmcb_set_intercept((unsigned long *)&control->intercepts, bit);
> +}
> +
> +static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
> +{
> + __vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
> +}
> +
> +static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
> +{
> + return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
> +}
> +
> +static inline void vmcb12_clr_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
> +{
> + __vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
> }
>
> static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
> {
> - WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> - return test_bit(bit, (unsigned long *)&control->intercepts);
> + return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
> }
>
> static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
> --
> 2.52.0.239.gd5f0c6e74e-goog
>
On Mon, Dec 15, 2025 at 1:51 PM Yosry Ahmed <yosry.ahmed@linux.dev> wrote:
>
> On Mon, Dec 15, 2025 at 04:07:10PM +0000, Kevin Cheng wrote:
> > If a feature is not advertised in the guest's CPUID, prevent L1 from
> > intercepting the unsupported instructions by clearing the corresponding
> > intercept in KVM's cached vmcb12.
> >
> > When an L2 guest executes an instruction that is not advertised to L1,
> > we expect a #UD exception to be injected by L0. However, the nested svm
> > exit handler first checks if the instruction intercept is set in vmcb12,
> > and if so, synthesizes an exit from L2 to L1 instead of a #UD exception.
> > If a feature is not advertised, the L1 intercept should be ignored.
> >
> > While creating KVM's cached vmcb12, sanitize the intercepts for
> > instructions that are not advertised in the guest CPUID. This
> > effectively ignores the L1 intercept on nested vm exit handling.
>
> Nit: It also ignores the L1 intercept when computing the intercepts in
> VMCB02, so if L0 (for some reason) does not intercept the instruction,
> KVM won't intercept it at all.
>
> I don't think this should happen because KVM should always intercept
> unsupported instructions to inject a #UD, unless they are not supported
> by HW, in which case I believe the HW will inject the #UD for us.
>
> >
> > Signed-off-by: Kevin Cheng <chengkev@google.com>
>
> Maybe also this since Sean contributed code to the patch in his last
> review?
>
> Co-developed-by: Sean Christopherson <seanjc@google.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
>
I promise I didn't mean to leave that out haha. Sorry Sean! Done.
> Otherwise looks good to me, FWIW:
> Reviewed-by: Yosry Ahmed <yosry.ahmed@linux.dev>
>
> > ---
> > v1 -> v2:
> > - Removed nested_intercept_mask which was a bit mask for nested
> > intercepts to ignore.
> > - Now sanitizing intercepts every time cached vmcb12 is created
> > - New wrappers for vmcb set/clear intercept functions
> > - Added macro functions for vmcb12 intercept sanitizing
> > - All changes suggested by Sean. Thanks!
> > - https://lore.kernel.org/all/20251205070630.4013452-1-chengkev@google.com/
> >
> > arch/x86/kvm/svm/nested.c | 19 +++++++++++++++++++
> > arch/x86/kvm/svm/svm.h | 35 +++++++++++++++++++++++++++--------
> > 2 files changed, 46 insertions(+), 8 deletions(-)
> >
> > diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> > index c81005b245222..5ffc12a315ec7 100644
> > --- a/arch/x86/kvm/svm/nested.c
> > +++ b/arch/x86/kvm/svm/nested.c
> > @@ -403,6 +403,19 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu)
> > return __nested_vmcb_check_controls(vcpu, ctl);
> > }
> >
> > +/*
> > + * If a feature is not advertised to L1, clear the corresponding vmcb12
> > + * intercept.
> > + */
> > +#define __nested_svm_sanitize_intercept(__vcpu, __control, fname, iname) \
> > +do { \
> > + if (!guest_cpu_cap_has(__vcpu, X86_FEATURE_##fname)) \
> > + vmcb12_clr_intercept(__control, INTERCEPT_##iname); \
> > +} while (0)
> > +
> > +#define nested_svm_sanitize_intercept(__vcpu, __control, name) \
> > + __nested_svm_sanitize_intercept(__vcpu, __control, name, name)
> > +
> > static
> > void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
> > struct vmcb_ctrl_area_cached *to,
> > @@ -413,6 +426,12 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
> > for (i = 0; i < MAX_INTERCEPT; i++)
> > to->intercepts[i] = from->intercepts[i];
> >
> > + __nested_svm_sanitize_intercept(vcpu, to, XSAVE, XSETBV);
> > + nested_svm_sanitize_intercept(vcpu, to, INVPCID);
> > + nested_svm_sanitize_intercept(vcpu, to, RDTSCP);
> > + nested_svm_sanitize_intercept(vcpu, to, SKINIT);
> > + nested_svm_sanitize_intercept(vcpu, to, RDPRU);
> > +
> > to->iopm_base_pa = from->iopm_base_pa;
> > to->msrpm_base_pa = from->msrpm_base_pa;
> > to->tsc_offset = from->tsc_offset;
> > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
> > index 9e151dbdef25d..7a8c92c4de2fb 100644
> > --- a/arch/x86/kvm/svm/svm.h
> > +++ b/arch/x86/kvm/svm/svm.h
> > @@ -434,28 +434,47 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
> > */
> > #define SVM_REGS_LAZY_LOAD_SET (1 << VCPU_EXREG_PDPTR)
> >
> > -static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
> > +static inline void __vmcb_set_intercept(unsigned long *intercepts, u32 bit)
> > {
> > WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> > - __set_bit(bit, (unsigned long *)&control->intercepts);
> > + __set_bit(bit, intercepts);
> > }
> >
> > -static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
> > +static inline void __vmcb_clr_intercept(unsigned long *intercepts, u32 bit)
> > {
> > WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> > - __clear_bit(bit, (unsigned long *)&control->intercepts);
> > + __clear_bit(bit, intercepts);
> > }
> >
> > -static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
> > +static inline bool __vmcb_is_intercept(unsigned long *intercepts, u32 bit)
> > {
> > WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> > - return test_bit(bit, (unsigned long *)&control->intercepts);
> > + return test_bit(bit, intercepts);
> > +}
> > +
> > +static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
> > +{
> > + __vmcb_set_intercept((unsigned long *)&control->intercepts, bit);
> > +}
> > +
> > +static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
> > +{
> > + __vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
> > +}
> > +
> > +static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
> > +{
> > + return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
> > +}
> > +
> > +static inline void vmcb12_clr_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
> > +{
> > + __vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
> > }
> >
> > static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
> > {
> > - WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
> > - return test_bit(bit, (unsigned long *)&control->intercepts);
> > + return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
> > }
> >
> > static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
> > --
> > 2.52.0.239.gd5f0c6e74e-goog
> >
© 2016 - 2026 Red Hat, Inc.