Add VMX exit handlers for SEAMCALL and TDCALL to inject a #UD if a non-TD
guest attempts to execute SEAMCALL or TDCALL. Neither SEAMCALL nor TDCALL
is gated by any software enablement other than VMXON, and so will generate
a VM-Exit instead of e.g. a native #UD when executed from the guest kernel.
Note! No unprivileged DoS of the L1 kernel is possible as TDCALL and
SEAMCALL #GP at CPL > 0, and the CPL check is performed prior to the VMX
non-root (VM-Exit) check, i.e. userspace can't crash the VM. And for a
nested guest, KVM forwards unknown exits to L1, i.e. an L2 kernel can
crash itself, but not L1.
Note #2! The Intel® Trust Domain CPU Architectural Extensions spec's
pseudocode shows the CPL > 0 check for SEAMCALL coming _after_ the VM-Exit,
but that appears to be a documentation bug (likely because the CPL > 0
check was incorrectly bundled with other lower-priority #GP checks).
Testing on SPR and EMR shows that the CPL > 0 check is performed before
the VMX non-root check, i.e. SEAMCALL #GPs when executed in usermode.
Note #3! The aforementioned Trust Domain spec uses confusing pseudocode
that says that SEAMCALL will #UD if executed "inSEAM", but "inSEAM"
specifically means in SEAM Root Mode, i.e. in the TDX-Module. The long-
form description explicitly states that SEAMCALL generates an exit when
executed in "SEAM VMX non-root operation". But that's a moot point as the
TDX-Module injects #UD if the guest attempts to execute SEAMCALL, as
documented in the "Unconditionally Blocked Instructions" section of the
TDX-Module base specification.
Cc: stable@vger.kernel.org
Cc: Kai Huang <kai.huang@intel.com>
Cc: Xiaoyao Li <xiaoyao.li@intel.com>
Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: Binbin Wu <binbin.wu@linux.intel.com>
Signed-off-by: Sean Christopherson <seanjc@google.com>
---
arch/x86/include/uapi/asm/vmx.h | 1 +
arch/x86/kvm/vmx/nested.c | 8 ++++++++
arch/x86/kvm/vmx/vmx.c | 8 ++++++++
3 files changed, 17 insertions(+)
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 9792e329343e..1baa86dfe029 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -93,6 +93,7 @@
#define EXIT_REASON_TPAUSE 68
#define EXIT_REASON_BUS_LOCK 74
#define EXIT_REASON_NOTIFY 75
+#define EXIT_REASON_SEAMCALL 76
#define EXIT_REASON_TDCALL 77
#define EXIT_REASON_MSR_READ_IMM 84
#define EXIT_REASON_MSR_WRITE_IMM 85
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 76271962cb70..bcea087b642f 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -6728,6 +6728,14 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
case EXIT_REASON_NOTIFY:
/* Notify VM exit is not exposed to L1 */
return false;
+ case EXIT_REASON_SEAMCALL:
+ case EXIT_REASON_TDCALL:
+ /*
+ * SEAMCALL and TDCALL unconditionally VM-Exit, but aren't
+ * virtualized by KVM for L1 hypervisors, i.e. L1 should
+ * never want or expect such an exit.
+ */
+ return false;
default:
return true;
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 546272a5d34d..d1b34b7ca4a3 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6033,6 +6033,12 @@ static int handle_vmx_instruction(struct kvm_vcpu *vcpu)
return 1;
}
+static int handle_tdx_instruction(struct kvm_vcpu *vcpu)
+{
+ kvm_queue_exception(vcpu, UD_VECTOR);
+ return 1;
+}
+
#ifndef CONFIG_X86_SGX_KVM
static int handle_encls(struct kvm_vcpu *vcpu)
{
@@ -6158,6 +6164,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_ENCLS] = handle_encls,
[EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit,
[EXIT_REASON_NOTIFY] = handle_notify,
+ [EXIT_REASON_SEAMCALL] = handle_tdx_instruction,
+ [EXIT_REASON_TDCALL] = handle_tdx_instruction,
[EXIT_REASON_MSR_READ_IMM] = handle_rdmsr_imm,
[EXIT_REASON_MSR_WRITE_IMM] = handle_wrmsr_imm,
};
--
2.51.0.858.gf9c4a03a3a-goog
On Thu, 2025-10-16 at 11:21 -0700, Sean Christopherson wrote: > Add VMX exit handlers for SEAMCALL and TDCALL to inject a #UD if a non-TD > guest attempts to execute SEAMCALL or TDCALL. Neither SEAMCALL nor TDCALL > is gated by any software enablement other than VMXON, and so will generate > a VM-Exit instead of e.g. a native #UD when executed from the guest kernel. > > Note! No unprivileged DoS of the L1 kernel is possible as TDCALL and > SEAMCALL #GP at CPL > 0, and the CPL check is performed prior to the VMX > non-root (VM-Exit) check, i.e. userspace can't crash the VM. And for a > nested guest, KVM forwards unknown exits to L1, i.e. an L2 kernel can > crash itself, but not L1. > > Note #2! The Intel® Trust Domain CPU Architectural Extensions spec's > pseudocode shows the CPL > 0 check for SEAMCALL coming _after_ the VM-Exit, > but that appears to be a documentation bug (likely because the CPL > 0 > check was incorrectly bundled with other lower-priority #GP checks). > Testing on SPR and EMR shows that the CPL > 0 check is performed before > the VMX non-root check, i.e. SEAMCALL #GPs when executed in usermode. > > Note #3! The aforementioned Trust Domain spec uses confusing pseudocode > that says that SEAMCALL will #UD if executed "inSEAM", but "inSEAM" > specifically means in SEAM Root Mode, i.e. in the TDX-Module. The long- > form description explicitly states that SEAMCALL generates an exit when > executed in "SEAM VMX non-root operation". But that's a moot point as the > TDX-Module injects #UD if the guest attempts to execute SEAMCALL, as > documented in the "Unconditionally Blocked Instructions" section of the > TDX-Module base specification. > > Cc: stable@vger.kernel.org > Cc: Kai Huang <kai.huang@intel.com> > Cc: Xiaoyao Li <xiaoyao.li@intel.com> > Cc: Rick Edgecombe <rick.p.edgecombe@intel.com> > Cc: Dan Williams <dan.j.williams@intel.com> > Cc: Binbin Wu <binbin.wu@linux.intel.com> > Signed-off-by: Sean Christopherson <seanjc@google.com> > Reviewed-by: Kai Huang <kai.huang@intel.com>
> --- a/arch/x86/kvm/vmx/nested.c > +++ b/arch/x86/kvm/vmx/nested.c > @@ -6728,6 +6728,14 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, > case EXIT_REASON_NOTIFY: > /* Notify VM exit is not exposed to L1 */ > return false; > + case EXIT_REASON_SEAMCALL: > + case EXIT_REASON_TDCALL: > + /* > + * SEAMCALL and TDCALL unconditionally VM-Exit, but aren't > + * virtualized by KVM for L1 hypervisors, i.e. L1 should > + * never want or expect such an exit. > + */ > + return false; Sorry for commenting late. I think from emulating hardware behaviour's perspective, if L1 doesn't support TDX (obviously true), SEAMCALL/TDCALL in L2 should cause VMEXIT to L1. In other words, L1 is expecting a VMEXIT in such case. Whether L1 can handle such VMEXIT is another story -- it may inject a #UD to L2 or may not (similar to the current upstream KVM), but it is L1's responsibility. So I think while this patch certainly honors the correct behaviour for L2, it doesn't honor for L1. But I think ultimately L1 should be the one who is responsible for emulating hardware behaviour for L2. E.g., assuming we have a KVM selftest in L1 to test SEAMCALL/TDCALL in normal VMX L2. L1 should be able to catch it's own bug when such VMEXIT isn't handled correctly. But with this patch, L1 will never be able to catch this IIUC.
On Fri, Oct 17, 2025, Kai Huang wrote:
>
> > --- a/arch/x86/kvm/vmx/nested.c
> > +++ b/arch/x86/kvm/vmx/nested.c
> > @@ -6728,6 +6728,14 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
> > case EXIT_REASON_NOTIFY:
> > /* Notify VM exit is not exposed to L1 */
> > return false;
> > + case EXIT_REASON_SEAMCALL:
> > + case EXIT_REASON_TDCALL:
> > + /*
> > + * SEAMCALL and TDCALL unconditionally VM-Exit, but aren't
> > + * virtualized by KVM for L1 hypervisors, i.e. L1 should
> > + * never want or expect such an exit.
> > + */
> > + return false;
>
> Sorry for commenting late.
>
> I think from emulating hardware behaviour's perspective, if L1 doesn't
> support TDX (obviously true), SEAMCALL/TDCALL in L2 should cause VMEXIT to
> L1. In other words, L1 is expecting a VMEXIT in such case.
No, because from L1's perspective, the opcodes map to undefined instructions and
thus should #UD in L2. There's no super explicit enumeration, but IMO it's fair
to say that for L1 to think the instructions exists, it would need to observe
IA32_SEAMRR_PHYS_{BASE,MASK} for SEAMCALL, and MSR_IA32_MKTME_KEYID_PARTITIONING
as well for TDCALL. KVM doesn't emulate any of those instructions, and so L1
should never expect SEAMCALL or TDCALL to do anything other than #UD.
> Whether L1 can handle such VMEXIT is another story -- it may inject a #UD to
> L2 or may not (similar to the current upstream KVM), but it is L1's
> responsibility.
>
> So I think while this patch certainly honors the correct behaviour for L2,
> it doesn't honor for L1. But I think ultimately L1 should be the one who
> is responsible for emulating hardware behaviour for L2.
>
> E.g., assuming we have a KVM selftest in L1 to test SEAMCALL/TDCALL in
> normal VMX L2. L1 should be able to catch it's own bug when such VMEXIT
> isn't handled correctly. But with this patch, L1 will never be able to
> catch this IIUC.
On Fri, 2025-10-17 at 06:00 -0700, Sean Christopherson wrote:
> On Fri, Oct 17, 2025, Kai Huang wrote:
> >
> > > --- a/arch/x86/kvm/vmx/nested.c
> > > +++ b/arch/x86/kvm/vmx/nested.c
> > > @@ -6728,6 +6728,14 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
> > > case EXIT_REASON_NOTIFY:
> > > /* Notify VM exit is not exposed to L1 */
> > > return false;
> > > + case EXIT_REASON_SEAMCALL:
> > > + case EXIT_REASON_TDCALL:
> > > + /*
> > > + * SEAMCALL and TDCALL unconditionally VM-Exit, but aren't
> > > + * virtualized by KVM for L1 hypervisors, i.e. L1 should
> > > + * never want or expect such an exit.
> > > + */
> > > + return false;
> >
> > Sorry for commenting late.
> >
> > I think from emulating hardware behaviour's perspective, if L1 doesn't
> > support TDX (obviously true), SEAMCALL/TDCALL in L2 should cause VMEXIT to
> > L1. In other words, L1 is expecting a VMEXIT in such case.
>
> No, because from L1's perspective, the opcodes map to undefined instructions and
> thus should #UD in L2. There's no super explicit enumeration, but IMO it's fair
> to say that for L1 to think the instructions exists, it would need to observe
> IA32_SEAMRR_PHYS_{BASE,MASK} for SEAMCALL, and MSR_IA32_MKTME_KEYID_PARTITIONING
> as well for TDCALL. KVM doesn't emulate any of those instructions, and so L1
> should never expect SEAMCALL or TDCALL to do anything other than #UD.
>
Oh right. I forgot the SEAMCALL/TDCALL VMEXIT only happens on TDX-capable
machine. :-)
>
On 10/17/2025 2:21 AM, Sean Christopherson wrote: > Add VMX exit handlers for SEAMCALL and TDCALL to inject a #UD if a non-TD > guest attempts to execute SEAMCALL or TDCALL. Neither SEAMCALL nor TDCALL > is gated by any software enablement other than VMXON, and so will generate > a VM-Exit instead of e.g. a native #UD when executed from the guest kernel. > > Note! No unprivileged DoS of the L1 kernel is possible as TDCALL and > SEAMCALL #GP at CPL > 0, and the CPL check is performed prior to the VMX > non-root (VM-Exit) check, i.e. userspace can't crash the VM. And for a > nested guest, KVM forwards unknown exits to L1, i.e. an L2 kernel can > crash itself, but not L1. > > Note #2! The Intel® Trust Domain CPU Architectural Extensions spec's > pseudocode shows the CPL > 0 check for SEAMCALL coming _after_ the VM-Exit, > but that appears to be a documentation bug (likely because the CPL > 0 > check was incorrectly bundled with other lower-priority #GP checks). > Testing on SPR and EMR shows that the CPL > 0 check is performed before > the VMX non-root check, i.e. SEAMCALL #GPs when executed in usermode. > > Note #3! The aforementioned Trust Domain spec uses confusing pseudocode > that says that SEAMCALL will #UD if executed "inSEAM", but "inSEAM" > specifically means in SEAM Root Mode, i.e. in the TDX-Module. The long- > form description explicitly states that SEAMCALL generates an exit when > executed in "SEAM VMX non-root operation". But that's a moot point as the > TDX-Module injects #UD if the guest attempts to execute SEAMCALL, as > documented in the "Unconditionally Blocked Instructions" section of the > TDX-Module base specification. > > Cc: stable@vger.kernel.org > Cc: Kai Huang <kai.huang@intel.com> > Cc: Xiaoyao Li <xiaoyao.li@intel.com> > Cc: Rick Edgecombe <rick.p.edgecombe@intel.com> > Cc: Dan Williams <dan.j.williams@intel.com> > Cc: Binbin Wu <binbin.wu@linux.intel.com> > Signed-off-by: Sean Christopherson <seanjc@google.com> Reviewed-by: Binbin Wu <binbin.wu@linux.intel.com>
On 10/17/2025 2:21 AM, Sean Christopherson wrote:
> Add VMX exit handlers for SEAMCALL and TDCALL to inject a #UD if a non-TD
> guest attempts to execute SEAMCALL or TDCALL. Neither SEAMCALL nor TDCALL
> is gated by any software enablement other than VMXON, and so will generate
> a VM-Exit instead of e.g. a native #UD when executed from the guest kernel.
>
> Note! No unprivileged DoS of the L1 kernel is possible as TDCALL and
> SEAMCALL #GP at CPL > 0, and the CPL check is performed prior to the VMX
> non-root (VM-Exit) check, i.e. userspace can't crash the VM. And for a
> nested guest, KVM forwards unknown exits to L1, i.e. an L2 kernel can
> crash itself, but not L1.
>
> Note #2! The Intel® Trust Domain CPU Architectural Extensions spec's
> pseudocode shows the CPL > 0 check for SEAMCALL coming _after_ the VM-Exit,
> but that appears to be a documentation bug (likely because the CPL > 0
> check was incorrectly bundled with other lower-priority #GP checks).
> Testing on SPR and EMR shows that the CPL > 0 check is performed before
> the VMX non-root check, i.e. SEAMCALL #GPs when executed in usermode.
>
> Note #3! The aforementioned Trust Domain spec uses confusing pseudocode
> that says that SEAMCALL will #UD if executed "inSEAM", but "inSEAM"
> specifically means in SEAM Root Mode, i.e. in the TDX-Module. The long-
> form description explicitly states that SEAMCALL generates an exit when
> executed in "SEAM VMX non-root operation". But that's a moot point as the
> TDX-Module injects #UD if the guest attempts to execute SEAMCALL, as
> documented in the "Unconditionally Blocked Instructions" section of the
> TDX-Module base specification.
>
> Cc: stable@vger.kernel.org
> Cc: Kai Huang <kai.huang@intel.com>
> Cc: Xiaoyao Li <xiaoyao.li@intel.com>
> Cc: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Cc: Dan Williams <dan.j.williams@intel.com>
> Cc: Binbin Wu <binbin.wu@linux.intel.com>
> Signed-off-by: Sean Christopherson <seanjc@google.com>
Reviewed-by: Xiaoyao Li <xiaoyao.li@intel.com>
> ---
> arch/x86/include/uapi/asm/vmx.h | 1 +
> arch/x86/kvm/vmx/nested.c | 8 ++++++++
> arch/x86/kvm/vmx/vmx.c | 8 ++++++++
> 3 files changed, 17 insertions(+)
>
> diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
> index 9792e329343e..1baa86dfe029 100644
> --- a/arch/x86/include/uapi/asm/vmx.h
> +++ b/arch/x86/include/uapi/asm/vmx.h
> @@ -93,6 +93,7 @@
> #define EXIT_REASON_TPAUSE 68
> #define EXIT_REASON_BUS_LOCK 74
> #define EXIT_REASON_NOTIFY 75
> +#define EXIT_REASON_SEAMCALL 76
> #define EXIT_REASON_TDCALL 77
> #define EXIT_REASON_MSR_READ_IMM 84
> #define EXIT_REASON_MSR_WRITE_IMM 85
> diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
> index 76271962cb70..bcea087b642f 100644
> --- a/arch/x86/kvm/vmx/nested.c
> +++ b/arch/x86/kvm/vmx/nested.c
> @@ -6728,6 +6728,14 @@ static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
> case EXIT_REASON_NOTIFY:
> /* Notify VM exit is not exposed to L1 */
> return false;
> + case EXIT_REASON_SEAMCALL:
> + case EXIT_REASON_TDCALL:
> + /*
> + * SEAMCALL and TDCALL unconditionally VM-Exit, but aren't
> + * virtualized by KVM for L1 hypervisors, i.e. L1 should
> + * never want or expect such an exit.
> + */
> + return false;
> default:
> return true;
> }
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index 546272a5d34d..d1b34b7ca4a3 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -6033,6 +6033,12 @@ static int handle_vmx_instruction(struct kvm_vcpu *vcpu)
> return 1;
> }
>
> +static int handle_tdx_instruction(struct kvm_vcpu *vcpu)
> +{
> + kvm_queue_exception(vcpu, UD_VECTOR);
> + return 1;
> +}
> +
> #ifndef CONFIG_X86_SGX_KVM
> static int handle_encls(struct kvm_vcpu *vcpu)
> {
> @@ -6158,6 +6164,8 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
> [EXIT_REASON_ENCLS] = handle_encls,
> [EXIT_REASON_BUS_LOCK] = handle_bus_lock_vmexit,
> [EXIT_REASON_NOTIFY] = handle_notify,
> + [EXIT_REASON_SEAMCALL] = handle_tdx_instruction,
> + [EXIT_REASON_TDCALL] = handle_tdx_instruction,
> [EXIT_REASON_MSR_READ_IMM] = handle_rdmsr_imm,
> [EXIT_REASON_MSR_WRITE_IMM] = handle_wrmsr_imm,
> };
© 2016 - 2026 Red Hat, Inc.