[v2] Fuller TDX kexec support

[PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE

Posted by Vishal Verma 1 week, 3 days ago

Some early TDX-capable platforms have an erratum where a partial write
to TDX private memory can cause a machine check on a subsequent read.
On these platforms, kexec and kdump have been disabled in these cases,
because the old kernel cannot safely hand off TDX state to the new
kernel. Later TDX modules support the TDH.SYS.DISABLE SEAMCALL, which
provides a way to cleanly disable TDX and allow kexec to proceed.

The new SEAMCALL has an enumeration bit, but that is ignored. It is
expected that users will be using the latest TDX module, and the failure
mode for running the missing SEAMCALL on an older module is not fatal.

This can be a long running operation, and the time needed largely
depends on the amount of memory that has been allocated to TDs. If all
TDs have been destroyed prior to the sys_disable call, then it is fast,
with only needing to override the TDX module memory.

After the SEAMCALL completes, the TDX module is disabled and all memory
resources allocated to TDX are freed and reset. The next kernel can then
re-initialize the TDX module from scratch via the normal TDX bring-up
sequence.

The SEAMCALL can return two different error codes that expect a retry.
 - TDX_INTERRUPTED_RESUMABLE can be returned in the case of a host
   interrupt. However, it will not return until it makes some forward
   progress, so we can expect to complete even in the case of interrupt
   storms.
 - TDX_SYS_BUSY will be returned on contention with other TDH.SYS.*
   SEAMCALLs, however a side effect of TDH.SYS.DISABLE is that it will
   block other SEAMCALLs once it gets going. So this contention will be
   short lived.

So loop infinitely on either of these error codes, until success or other
error.

Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
---
 arch/x86/include/asm/shared/tdx_errno.h |  1 +
 arch/x86/include/asm/tdx.h              |  3 +++
 arch/x86/virt/vmx/tdx/tdx.h             |  1 +
 arch/x86/virt/vmx/tdx/tdx.c             | 28 ++++++++++++++++++++++++++++
 4 files changed, 33 insertions(+)

diff --git a/arch/x86/include/asm/shared/tdx_errno.h b/arch/x86/include/asm/shared/tdx_errno.h
index 8bf6765cf082..246b4fd54a48 100644
--- a/arch/x86/include/asm/shared/tdx_errno.h
+++ b/arch/x86/include/asm/shared/tdx_errno.h
@@ -15,6 +15,7 @@
 #define TDX_NON_RECOVERABLE_TD_NON_ACCESSIBLE	0x6000000500000000ULL
 #define TDX_NON_RECOVERABLE_TD_WRONG_APIC_MODE	0x6000000700000000ULL
 #define TDX_INTERRUPTED_RESUMABLE		0x8000000300000000ULL
+#define TDX_SYS_BUSY				0x8000020200000000ULL
 #define TDX_OPERAND_INVALID			0xC000010000000000ULL
 #define TDX_OPERAND_BUSY			0x8000020000000000ULL
 #define TDX_PREVIOUS_TLB_EPOCH_BUSY		0x8000020100000000ULL
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 7674fc530090..a0a4a15142fc 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -172,6 +172,8 @@ static inline int pg_level_to_tdx_sept_level(enum pg_level level)
         return level - 1;
 }
 
+void tdx_sys_disable(void);
+
 u64 tdh_vp_enter(struct tdx_vp *vp, struct tdx_module_args *args);
 u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page);
 u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2);
@@ -203,6 +205,7 @@ static inline void tdx_init(void) { }
 static inline u32 tdx_get_nr_guest_keyids(void) { return 0; }
 static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
 static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; }
+static inline void tdx_sys_disable(void) { }
 #endif	/* CONFIG_INTEL_TDX_HOST */
 
 #endif /* !__ASSEMBLER__ */
diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
index dde219c823b4..e2cf2dd48755 100644
--- a/arch/x86/virt/vmx/tdx/tdx.h
+++ b/arch/x86/virt/vmx/tdx/tdx.h
@@ -46,6 +46,7 @@
 #define TDH_PHYMEM_PAGE_WBINVD		41
 #define TDH_VP_WR			43
 #define TDH_SYS_CONFIG			45
+#define TDH_SYS_DISABLE			69
 
 /*
  * SEAMCALL leaf:
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 0802d0fd18a4..3a76000dec7a 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -37,6 +37,7 @@
 #include <asm/msr.h>
 #include <asm/cpufeature.h>
 #include <asm/tdx.h>
+#include <asm/shared/tdx_errno.h>
 #include <asm/cpu_device_id.h>
 #include <asm/processor.h>
 #include <asm/mce.h>
@@ -1940,3 +1941,30 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
 	return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
 }
 EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_hkid);
+
+void tdx_sys_disable(void)
+{
+	struct tdx_module_args args = {};
+	u64 ret;
+
+	/*
+	 * Don't loop forever.
+	 *  - TDX_INTERRUPTED_RESUMABLE guarantees forward progress between
+	 *    calls.
+	 *  - TDX_SYS_BUSY could transiently contend with TDH.SYS.* SEAMCALLs,
+	 *    but will lock out future ones.
+	 *
+	 * This is a 'destructive' SEAMCALL, in that no other SEAMCALL can be
+	 * run after this until a full reinitialization is done.
+	 */
+	do {
+		ret = seamcall(TDH_SYS_DISABLE, &args);
+	} while (ret == TDX_INTERRUPTED_RESUMABLE || ret == TDX_SYS_BUSY);
+
+	/*
+	 * Print SEAMCALL failures, but not SW-defined error codes
+	 * (SEAMCALL faulted with #GP/#UD, TDX not supported).
+	 */
+	if (ret && (ret & TDX_SW_ERROR) != TDX_SW_ERROR)
+		pr_err("TDH.SYS.DISABLE failed: 0x%016llx\n", ret);
+}

-- 
2.53.0

Re: [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE

Posted by Kiryl Shutsemau 4 days, 5 hours ago

On Mon, Mar 23, 2026 at 02:59:06PM -0600, Vishal Verma wrote:
> Some early TDX-capable platforms have an erratum where a partial write
> to TDX private memory can cause a machine check on a subsequent read.
> On these platforms, kexec and kdump have been disabled in these cases,
> because the old kernel cannot safely hand off TDX state to the new
> kernel. Later TDX modules support the TDH.SYS.DISABLE SEAMCALL, which
> provides a way to cleanly disable TDX and allow kexec to proceed.
> 
> The new SEAMCALL has an enumeration bit, but that is ignored. It is
> expected that users will be using the latest TDX module, and the failure
> mode for running the missing SEAMCALL on an older module is not fatal.
> 
> This can be a long running operation, and the time needed largely
> depends on the amount of memory that has been allocated to TDs. If all
> TDs have been destroyed prior to the sys_disable call, then it is fast,
> with only needing to override the TDX module memory.
> 
> After the SEAMCALL completes, the TDX module is disabled and all memory
> resources allocated to TDX are freed and reset. The next kernel can then
> re-initialize the TDX module from scratch via the normal TDX bring-up
> sequence.
> 
> The SEAMCALL can return two different error codes that expect a retry.
>  - TDX_INTERRUPTED_RESUMABLE can be returned in the case of a host
>    interrupt. However, it will not return until it makes some forward
>    progress, so we can expect to complete even in the case of interrupt
>    storms.
>  - TDX_SYS_BUSY will be returned on contention with other TDH.SYS.*
>    SEAMCALLs, however a side effect of TDH.SYS.DISABLE is that it will
>    block other SEAMCALLs once it gets going. So this contention will be
>    short lived.
> 
> So loop infinitely on either of these error codes, until success or other
> error.
> 
> Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
> ---
>  arch/x86/include/asm/shared/tdx_errno.h |  1 +
>  arch/x86/include/asm/tdx.h              |  3 +++
>  arch/x86/virt/vmx/tdx/tdx.h             |  1 +
>  arch/x86/virt/vmx/tdx/tdx.c             | 28 ++++++++++++++++++++++++++++
>  4 files changed, 33 insertions(+)
> 
> diff --git a/arch/x86/include/asm/shared/tdx_errno.h b/arch/x86/include/asm/shared/tdx_errno.h
> index 8bf6765cf082..246b4fd54a48 100644
> --- a/arch/x86/include/asm/shared/tdx_errno.h
> +++ b/arch/x86/include/asm/shared/tdx_errno.h
> @@ -15,6 +15,7 @@
>  #define TDX_NON_RECOVERABLE_TD_NON_ACCESSIBLE	0x6000000500000000ULL
>  #define TDX_NON_RECOVERABLE_TD_WRONG_APIC_MODE	0x6000000700000000ULL
>  #define TDX_INTERRUPTED_RESUMABLE		0x8000000300000000ULL
> +#define TDX_SYS_BUSY				0x8000020200000000ULL
>  #define TDX_OPERAND_INVALID			0xC000010000000000ULL
>  #define TDX_OPERAND_BUSY			0x8000020000000000ULL
>  #define TDX_PREVIOUS_TLB_EPOCH_BUSY		0x8000020100000000ULL
> diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> index 7674fc530090..a0a4a15142fc 100644
> --- a/arch/x86/include/asm/tdx.h
> +++ b/arch/x86/include/asm/tdx.h
> @@ -172,6 +172,8 @@ static inline int pg_level_to_tdx_sept_level(enum pg_level level)
>          return level - 1;
>  }
>  
> +void tdx_sys_disable(void);
> +
>  u64 tdh_vp_enter(struct tdx_vp *vp, struct tdx_module_args *args);
>  u64 tdh_mng_addcx(struct tdx_td *td, struct page *tdcs_page);
>  u64 tdh_mem_page_add(struct tdx_td *td, u64 gpa, struct page *page, struct page *source, u64 *ext_err1, u64 *ext_err2);
> @@ -203,6 +205,7 @@ static inline void tdx_init(void) { }
>  static inline u32 tdx_get_nr_guest_keyids(void) { return 0; }
>  static inline const char *tdx_dump_mce_info(struct mce *m) { return NULL; }
>  static inline const struct tdx_sys_info *tdx_get_sysinfo(void) { return NULL; }
> +static inline void tdx_sys_disable(void) { }
>  #endif	/* CONFIG_INTEL_TDX_HOST */
>  
>  #endif /* !__ASSEMBLER__ */
> diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
> index dde219c823b4..e2cf2dd48755 100644
> --- a/arch/x86/virt/vmx/tdx/tdx.h
> +++ b/arch/x86/virt/vmx/tdx/tdx.h
> @@ -46,6 +46,7 @@
>  #define TDH_PHYMEM_PAGE_WBINVD		41
>  #define TDH_VP_WR			43
>  #define TDH_SYS_CONFIG			45
> +#define TDH_SYS_DISABLE			69
>  
>  /*
>   * SEAMCALL leaf:
> diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> index 0802d0fd18a4..3a76000dec7a 100644
> --- a/arch/x86/virt/vmx/tdx/tdx.c
> +++ b/arch/x86/virt/vmx/tdx/tdx.c
> @@ -37,6 +37,7 @@
>  #include <asm/msr.h>
>  #include <asm/cpufeature.h>
>  #include <asm/tdx.h>
> +#include <asm/shared/tdx_errno.h>
>  #include <asm/cpu_device_id.h>
>  #include <asm/processor.h>
>  #include <asm/mce.h>
> @@ -1940,3 +1941,30 @@ u64 tdh_phymem_page_wbinvd_hkid(u64 hkid, struct page *page)
>  	return seamcall(TDH_PHYMEM_PAGE_WBINVD, &args);
>  }
>  EXPORT_SYMBOL_FOR_KVM(tdh_phymem_page_wbinvd_hkid);
> +
> +void tdx_sys_disable(void)
> +{
> +	struct tdx_module_args args = {};
> +	u64 ret;
> +
> +	/*
> +	 * Don't loop forever.

Nit: Add a new line here.

> +	 *  - TDX_INTERRUPTED_RESUMABLE guarantees forward progress between
> +	 *    calls.

And here.

> +	 *  - TDX_SYS_BUSY could transiently contend with TDH.SYS.* SEAMCALLs,
> +	 *    but will lock out future ones.

Locked out by who? Is it TDX module contract? I don't see it documented in
the spec.

I assumed that if the SEAMCALL fails other SEAMCALLs suppose to be
functional. Hm?

> +	 *
> +	 * This is a 'destructive' SEAMCALL, in that no other SEAMCALL can be
> +	 * run after this until a full reinitialization is done.
> +	 */
> +	do {
> +		ret = seamcall(TDH_SYS_DISABLE, &args);
> +	} while (ret == TDX_INTERRUPTED_RESUMABLE || ret == TDX_SYS_BUSY);
> +
> +	/*
> +	 * Print SEAMCALL failures, but not SW-defined error codes
> +	 * (SEAMCALL faulted with #GP/#UD, TDX not supported).
> +	 */
> +	if (ret && (ret & TDX_SW_ERROR) != TDX_SW_ERROR)
> +		pr_err("TDH.SYS.DISABLE failed: 0x%016llx\n", ret);
> +}
> 
> -- 
> 2.53.0
> 

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

Re: [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE

Posted by Edgecombe, Rick P 3 days, 22 hours ago

On Mon, 2026-03-30 at 11:58 +0000, Kiryl Shutsemau wrote:
> > +	 *  - TDX_SYS_BUSY could transiently contend with
> > TDH.SYS.* SEAMCALLs,
> > +	 *    but will lock out future ones.
> 
> Locked out by who? Is it TDX module contract? I don't see it
> documented in
> the spec.

Yea, by the TDX module.

We relayed that we need this specific behavior around TDX_SYS_BUSY
contention, but the implementation isn't done. That spec is actually
still in draft form. Which is refreshing, because we can actually tweak
things like this based on what the kernel needs.

> 
> I assumed that if the SEAMCALL fails other SEAMCALLs suppose to be
> functional. Hm?

The behavior should be that once you make this seamcall (assuming it's
supported) that no other seamcalls can be made. They will return an
error. Do you think something else would be better? If it's an old TDX
module, nothing happens of course.

So let's change the module if we see a problem. What should it be?

Re: [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE

Posted by Kiryl Shutsemau 3 days, 5 hours ago

On Mon, Mar 30, 2026 at 07:25:22PM +0000, Edgecombe, Rick P wrote:
> > I assumed that if the SEAMCALL fails other SEAMCALLs suppose to be
> > functional. Hm?
> 
> The behavior should be that once you make this seamcall (assuming it's
> supported) that no other seamcalls can be made. They will return an
> error. Do you think something else would be better? If it's an old TDX
> module, nothing happens of course.

I guess the actual behaviour is dependant on the return code. It is
obviously going to be the case for TDX_SUCCESS. And from the discussion,
I guess that's true for TDX_SYS_BUSY and TDX_INTERRUPTED_RESUMABLE.

What about other cases? The spec draft also lists TDX_SYS_NOT_READY and
TDX_SYS_SHUTDOWN.

I wounder if it can affect the kernel. Consider the case when kexec
(crash kernel start) happens due to crash on TDX module.

Will we be able to shutdown TDX module cleanly and make kexec safe?

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

Re: [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE

Posted by Verma, Vishal L 2 days, 23 hours ago

On Tue, 2026-03-31 at 13:18 +0100, Kiryl Shutsemau wrote:
> On Mon, Mar 30, 2026 at 07:25:22PM +0000, Edgecombe, Rick P wrote:
> > > I assumed that if the SEAMCALL fails other SEAMCALLs suppose to be
> > > functional. Hm?
> > 
> > The behavior should be that once you make this seamcall (assuming it's
> > supported) that no other seamcalls can be made. They will return an
> > error. Do you think something else would be better? If it's an old TDX
> > module, nothing happens of course.
> 
> I guess the actual behaviour is dependant on the return code. It is
> obviously going to be the case for TDX_SUCCESS. And from the discussion,
> I guess that's true for TDX_SYS_BUSY and TDX_INTERRUPTED_RESUMABLE.
> 
> What about other cases? The spec draft also lists TDX_SYS_NOT_READY and
> TDX_SYS_SHUTDOWN.

I think these are safe too - TDX_SYS_SHUTDOWN means the module has
already been shutdown, which this seamcall would've done, so things
should be in the same state either way.

TDX_SYS_NOT_READY means the module hasn't been initialized yet. This
seamcall should just exit, and the module is already blocking any
seamcall that need the module to be initialized. The seamcalls to
initialize the module will be allowed, as they are after a sys_disable
call anyway.

> 
> I wounder if it can affect the kernel. Consider the case when kexec
> (crash kernel start) happens due to crash on TDX module.
> 
> Will we be able to shutdown TDX module cleanly and make kexec safe?

Hm  -are the semantics for what happens if there is a crash in the
module defined? I think Linux should expect that sys_disable should
either start doing its shutdown work, or exit with one of the other
defined exit statuses. Anything else would be considered a module bug.

Re: [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE

Posted by Edgecombe, Rick P 2 days, 20 hours ago

On Tue, 2026-03-31 at 18:22 +0000, Verma, Vishal L wrote:
> > 
> > I guess the actual behaviour is dependant on the return code. It is
> > obviously going to be the case for TDX_SUCCESS. And from the discussion,
> > I guess that's true for TDX_SYS_BUSY and TDX_INTERRUPTED_RESUMABLE.
> > 
> > What about other cases? The spec draft also lists TDX_SYS_NOT_READY and
> > TDX_SYS_SHUTDOWN.
> 
> I think these are safe too - TDX_SYS_SHUTDOWN means the module has
> already been shutdown, which this seamcall would've done, so things
> should be in the same state either way.
> 
> TDX_SYS_NOT_READY means the module hasn't been initialized yet. This
> seamcall should just exit, and the module is already blocking any
> seamcall that need the module to be initialized. The seamcalls to
> initialize the module will be allowed, as they are after a sys_disable
> call anyway.

Should the seamcall return success in the case where it would return
TDX_SYS_NOT_READY? It is in basically a reset state right? The errors we care
about are actual errors (TDX_SW_ERROR), so it makes no difference to the code in
the patch. But it might be a nicer API for the seamcall?

> 
> > 
> > I wounder if it can affect the kernel. Consider the case when kexec
> > (crash kernel start) happens due to crash on TDX module.
> > 
> > Will we be able to shutdown TDX module cleanly and make kexec safe?
> 
> Hm  -are the semantics for what happens if there is a crash in the
> module defined? I think Linux should expect that sys_disable should
> either start doing its shutdown work, or exit with one of the other
> defined exit statuses. Anything else would be considered a module bug.

We often have the question come up about how much we should to guard against
bugs in the TDX module. I tend to also think we should not do defensive
programming, same as we do for the kernel. If it's easy to handle something or
emit a warning it's nice, but otherwise the solution for such cases should be to
fix the TDX module bug.

But for the kdump case, we don't actually need sys disable to succeed. The kdump
kernel will not load the TDX module. And as for the errata, this already needs a
special situation to be a problem. But even if it happens, I'd think better to
try to the kdump. Not sure what the fix would be for that scenario, even if we
allowed for a large complexity budget. So best effort seems good.

Does it seem reasonable?

Re: [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE

Posted by Dave Hansen 2 days, 3 hours ago

On 3/31/26 14:36, Edgecombe, Rick P wrote:
> On Tue, 2026-03-31 at 18:22 +0000, Verma, Vishal L wrote:
>>> I guess the actual behaviour is dependant on the return code. It is
>>> obviously going to be the case for TDX_SUCCESS. And from the discussion,
>>> I guess that's true for TDX_SYS_BUSY and TDX_INTERRUPTED_RESUMABLE.
>>>
>>> What about other cases? The spec draft also lists TDX_SYS_NOT_READY and
>>> TDX_SYS_SHUTDOWN.
>> I think these are safe too - TDX_SYS_SHUTDOWN means the module has
>> already been shutdown, which this seamcall would've done, so things
>> should be in the same state either way.
>>
>> TDX_SYS_NOT_READY means the module hasn't been initialized yet. This
>> seamcall should just exit, and the module is already blocking any
>> seamcall that need the module to be initialized. The seamcalls to
>> initialize the module will be allowed, as they are after a sys_disable
>> call anyway.
> Should the seamcall return success in the case where it would return
> TDX_SYS_NOT_READY? It is in basically a reset state right? The errors we care
> about are actual errors (TDX_SW_ERROR), so it makes no difference to the code in
> the patch. But it might be a nicer API for the seamcall?

The problem is that the module doesn't have *a* reset state.
TDX_SYS_NOT_READY gets returned before the module is initialized and
initialization is a long, arduous process.

For instance, I believe the module stays "not ready" in the middle of
giving it PAMT memory and a keyID and all that jazz.

TDX_SYS_NOT_READY is a way of saying it can't easily *make* it to the
actual reset state that TDH.SYS.DISABLE wants it to be in.

It's arguable that the module should be made more resilient to stop
returning TDX_SYS_NOT_READY. But it's not as simple as just changing a
return code in the module.

I'm OK with it continuing to return TDX_SYS_NOT_READY for now. I think
it's a useful indicator. Maybe the kernel can't do much with it, but
it's a little window into what went wrong.

Re: [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE

Posted by Kiryl Shutsemau 2 days, 8 hours ago

On Tue, Mar 31, 2026 at 09:36:03PM +0000, Edgecombe, Rick P wrote:
> On Tue, 2026-03-31 at 18:22 +0000, Verma, Vishal L wrote:
> > > 
> > > I guess the actual behaviour is dependant on the return code. It is
> > > obviously going to be the case for TDX_SUCCESS. And from the discussion,
> > > I guess that's true for TDX_SYS_BUSY and TDX_INTERRUPTED_RESUMABLE.
> > > 
> > > What about other cases? The spec draft also lists TDX_SYS_NOT_READY and
> > > TDX_SYS_SHUTDOWN.
> > 
> > I think these are safe too - TDX_SYS_SHUTDOWN means the module has
> > already been shutdown, which this seamcall would've done, so things
> > should be in the same state either way.
> > 
> > TDX_SYS_NOT_READY means the module hasn't been initialized yet. This
> > seamcall should just exit, and the module is already blocking any
> > seamcall that need the module to be initialized. The seamcalls to
> > initialize the module will be allowed, as they are after a sys_disable
> > call anyway.
> 
> Should the seamcall return success in the case where it would return
> TDX_SYS_NOT_READY? It is in basically a reset state right? The errors we care
> about are actual errors (TDX_SW_ERROR), so it makes no difference to the code in
> the patch. But it might be a nicer API for the seamcall?

I am not sure. TDX_SYS_NOT_READY can be useful as might indicate
mismatch of system state understanding between kernel and TDX module.

> > > I wounder if it can affect the kernel. Consider the case when kexec
> > > (crash kernel start) happens due to crash on TDX module.
> > > 
> > > Will we be able to shutdown TDX module cleanly and make kexec safe?
> > 
> > Hm  -are the semantics for what happens if there is a crash in the
> > module defined?

I meant kernel crash around/before TDX module initialization. Sorry for
confusion.

> > I think Linux should expect that sys_disable should
> > either start doing its shutdown work, or exit with one of the other
> > defined exit statuses. Anything else would be considered a module bug.
> 
> We often have the question come up about how much we should to guard against
> bugs in the TDX module. I tend to also think we should not do defensive
> programming, same as we do for the kernel. If it's easy to handle something or
> emit a warning it's nice, but otherwise the solution for such cases should be to
> fix the TDX module bug.
> 
> But for the kdump case, we don't actually need sys disable to succeed. The kdump
> kernel will not load the TDX module.

AFAIK, it is possible to start a normal kernel after kdump is done with
kexec (requires memmap= tricks). And the normal kernel might want to use
TDX again.

Not sure if it is done in practice. I would rather go full reboot path
after crash.

> And as for the errata, this already needs a
> special situation to be a problem. But even if it happens, I'd think better to
> try to the kdump. Not sure what the fix would be for that scenario, even if we
> allowed for a large complexity budget. So best effort seems good.
> 
> Does it seem reasonable?

I am probably too picky here. We want to start from make basic kexec
functionality to work for start.

Reviewed-by: Kiryl Shutsemau (Meta) <kas@kernel.org>

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

Re: [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE

Posted by Chao Gao 1 week, 3 days ago

On Mon, Mar 23, 2026 at 02:59:06PM -0600, Vishal Verma wrote:
>Some early TDX-capable platforms have an erratum where a partial write
>to TDX private memory can cause a machine check on a subsequent read.
>On these platforms, kexec and kdump have been disabled in these cases,
>because the old kernel cannot safely hand off TDX state to the new
>kernel. Later TDX modules support the TDH.SYS.DISABLE SEAMCALL, which
>provides a way to cleanly disable TDX and allow kexec to proceed.
>
>The new SEAMCALL has an enumeration bit, but that is ignored. It is
>expected that users will be using the latest TDX module, and the failure
>mode for running the missing SEAMCALL on an older module is not fatal.
>
>This can be a long running operation, and the time needed largely
>depends on the amount of memory that has been allocated to TDs. If all
>TDs have been destroyed prior to the sys_disable call, then it is fast,
>with only needing to override the TDX module memory.
>
>After the SEAMCALL completes, the TDX module is disabled and all memory
>resources allocated to TDX are freed and reset. The next kernel can then
>re-initialize the TDX module from scratch via the normal TDX bring-up
>sequence.
>
>The SEAMCALL can return two different error codes that expect a retry.
> - TDX_INTERRUPTED_RESUMABLE can be returned in the case of a host
>   interrupt. However, it will not return until it makes some forward
>   progress, so we can expect to complete even in the case of interrupt
>   storms.
> - TDX_SYS_BUSY will be returned on contention with other TDH.SYS.*
>   SEAMCALLs, however a side effect of TDH.SYS.DISABLE is that it will
>   block other SEAMCALLs once it gets going. So this contention will be
>   short lived.
>
>So loop infinitely on either of these error codes, until success or other
>error.
>
>Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
>Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>

Reviewed-by: Chao Gao <chao.gao@intel.com>

Re: [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE

Posted by Huang, Kai 1 week, 3 days ago

On Mon, 2026-03-23 at 14:59 -0600, Vishal Verma wrote:
> Some early TDX-capable platforms have an erratum where a partial write
> to TDX private memory can cause a machine check on a subsequent read.
> On these platforms, kexec and kdump have been disabled in these cases,
> because the old kernel cannot safely hand off TDX state to the new
> kernel. Later TDX modules support the TDH.SYS.DISABLE SEAMCALL, which
> provides a way to cleanly disable TDX and allow kexec to proceed.
> 
> The new SEAMCALL has an enumeration bit, but that is ignored. It is
> expected that users will be using the latest TDX module, and the failure
> mode for running the missing SEAMCALL on an older module is not fatal.
> 
> This can be a long running operation, and the time needed largely
> depends on the amount of memory that has been allocated to TDs. If all
> TDs have been destroyed prior to the sys_disable call, then it is fast,
> with only needing to override the TDX module memory.
> 
> After the SEAMCALL completes, the TDX module is disabled and all memory
> resources allocated to TDX are freed and reset. The next kernel can then
> re-initialize the TDX module from scratch via the normal TDX bring-up
> sequence.
> 
> The SEAMCALL can return two different error codes that expect a retry.
>  - TDX_INTERRUPTED_RESUMABLE can be returned in the case of a host
>    interrupt. However, it will not return until it makes some forward
>    progress, so we can expect to complete even in the case of interrupt
>    storms.
>  - TDX_SYS_BUSY will be returned on contention with other TDH.SYS.*
>    SEAMCALLs, however a side effect of TDH.SYS.DISABLE is that it will
>    block other SEAMCALLs once it gets going. So this contention will be
>    short lived.
> 
> So loop infinitely on either of these error codes, until success or other
> error.
> 
> Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
> Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
> 

Acked-by: Kai Huang <kai.huang@intel.com>

Re: [PATCH v2 3/5] x86/virt/tdx: Add SEAMCALL wrapper for TDH.SYS.DISABLE

Posted by Verma, Vishal L 1 week, 3 days ago

On Mon, 2026-03-23 at 14:59 -0600, Vishal Verma wrote:
> 
[..]
> +void tdx_sys_disable(void)
> +{
> +	struct tdx_module_args args = {};
> +	u64 ret;
> +
> +	/*
> +	 * Don't loop forever.
> +	 *  - TDX_INTERRUPTED_RESUMABLE guarantees forward progress between
> +	 *    calls.
> +	 *  - TDX_SYS_BUSY could transiently contend with TDH.SYS.* SEAMCALLs,
> +	 *    but will lock out future ones.
> +	 *
> +	 * This is a 'destructive' SEAMCALL, in that no other SEAMCALL can be
> +	 * run after this until a full reinitialization is done.
> +	 */
> +	do {
> +		ret = seamcall(TDH_SYS_DISABLE, &args);
> +	} while (ret == TDX_INTERRUPTED_RESUMABLE || ret == TDX_SYS_BUSY);
> +
> +	/*
> +	 * Print SEAMCALL failures, but not SW-defined error codes
> +	 * (SEAMCALL faulted with #GP/#UD, TDX not supported).
> +	 */
> +	if (ret && (ret & TDX_SW_ERROR) != TDX_SW_ERROR)
> +		pr_err("TDH.SYS.DISABLE failed: 0x%016llx\n", ret);
> +}

Note - old TDX modules that don't implement this SEAMCALL produce a
message like:

  virt/tdx: TDH.SYS.DISABLE failed: 0xc000010000000000

Where that code translates to TDX_OPERAND_INVALID.
This also serves as a nudge that the module should be updated.

It might be worth including a blurb about this in the commit message -
something like below. This could be included when applying, or I can
send an updated version with this if it is acceptable.

---

An error is printed if the SEAMCALL fails with anything other than the
error codes that cause retries, or 'synthesized' error codes produced
for #GP or #UD. e.g., an old module that has been properly initialized,
that doesn't implement SYS_DISABLE, returns TDX_OPERAND_INVALID. This
prints:

  virt/tdx: TDH.SYS.DISABLE failed: 0xc000010000000000

But a system that doesn't have any TDX support at all doesn't print
anything.