TD-Preserving updates depend on a userspace tool to select the appropriate
module to load. To facilitate this decision-making process, expose the
necessary information to userspace.
SEAMLDR version information can be used for compatibility check and
num_remaining_updates indicates how many updates can still be performed.
SEAMLDR serves as the foundation of TDX, as it is responsible for loading
the TDX module and, in other words, enabling the entire TDX system.
Therefore, attach its attributes to the root device of the new TDX virtual
subsystem.
Signed-off-by: Chao Gao <chao.gao@intel.com>
Tested-by: Farrah Chen <farrah.chen@intel.com>
---
Documentation/ABI/testing/sysfs-devices-tdx | 24 ++++++++++++++
arch/x86/virt/vmx/tdx/seamldr.c | 35 ++++++++++++++++++++-
arch/x86/virt/vmx/tdx/seamldr.h | 14 +++++++++
arch/x86/virt/vmx/tdx/tdx.c | 14 ++++++++-
4 files changed, 85 insertions(+), 2 deletions(-)
create mode 100644 arch/x86/virt/vmx/tdx/seamldr.h
diff --git a/Documentation/ABI/testing/sysfs-devices-tdx b/Documentation/ABI/testing/sysfs-devices-tdx
index ccbe6431241e..112f0738253b 100644
--- a/Documentation/ABI/testing/sysfs-devices-tdx
+++ b/Documentation/ABI/testing/sysfs-devices-tdx
@@ -6,3 +6,27 @@ Description: (RO) Report the version of the loaded TDX module. The TDX module
version is formatted as x.y.z, where "x" is the major version,
"y" is the minor version and "z" is the update version. Versions
are used for bug reporting, TD-Preserving updates and etc.
+
+What: /sys/devices/virtual/tdx/seamldr/version
+Date: March 2025
+KernelVersion: v6.15
+Contact: linux-coco@lists.linux.dev
+Description: (RO) Reports the version of the loaded SEAM loader. The SEAM
+ loader version is formatted as x.y.z, where "x" is the major
+ version, "y" is the minor version and "z" is the update version.
+ Versions are used for bug reporting and compatibility check.
+
+What: /sys/devices/virtual/tdx/seamldr/num_remaining_updates
+Date: March 2025
+KernelVersion: v6.15
+Contact: linux-coco@lists.linux.dev
+Description: (RO) Reports the number of remaining updates that can be
+ performed via TD-Preserving updates. It is always zero if
+ SEAMLDR doesn't TD-Preserving updates. Otherwise, it is an
+ arch-specific value after bootup. This value decreases by one
+ after each successful TD-Preserving update. Once it reaches
+ zero, further TD-Preserving updates will fail until next reboot.
+
+ See Intel® Trust Domain Extensions - SEAM Loader (SEAMLDR)
+ Interface Specification Chapter 3.3 "SEAMLDR_INFO" and Chapter
+ 4.2 "SEAMLDR.INSTALL" for more information.
diff --git a/arch/x86/virt/vmx/tdx/seamldr.c b/arch/x86/virt/vmx/tdx/seamldr.c
index c2771323729c..b628555daf55 100644
--- a/arch/x86/virt/vmx/tdx/seamldr.c
+++ b/arch/x86/virt/vmx/tdx/seamldr.c
@@ -7,9 +7,13 @@
#define pr_fmt(fmt) "seamldr: " fmt
#include <linux/cleanup.h>
+#include <linux/device.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
#include "tdx.h"
#include "../vmx.h"
+#include "seamldr.h"
/* P-SEAMLDR SEAMCALL leaf function */
#define P_SEAMLDR_INFO 0x8000000000000000
@@ -62,7 +66,36 @@ static inline int seamldr_call(u64 fn, struct tdx_module_args *args)
return ret;
}
-static __maybe_unused int get_seamldr_info(void)
+static ssize_t version_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u.%u.%u\n", seamldr_info.major_version,
+ seamldr_info.minor_version,
+ seamldr_info.update_version);
+}
+
+static ssize_t num_remaining_updates_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ return sysfs_emit(buf, "%u\n", seamldr_info.num_remaining_updates);
+}
+
+static DEVICE_ATTR_RO(version);
+static DEVICE_ATTR_RO(num_remaining_updates);
+
+static struct attribute *seamldr_attrs[] = {
+ &dev_attr_version.attr,
+ &dev_attr_num_remaining_updates.attr,
+ NULL,
+};
+
+struct attribute_group seamldr_group = {
+ .name = "seamldr",
+ .attrs = seamldr_attrs,
+};
+
+int get_seamldr_info(void)
{
struct tdx_module_args args = { .rcx = __pa(&seamldr_info) };
diff --git a/arch/x86/virt/vmx/tdx/seamldr.h b/arch/x86/virt/vmx/tdx/seamldr.h
new file mode 100644
index 000000000000..15597cb5036d
--- /dev/null
+++ b/arch/x86/virt/vmx/tdx/seamldr.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _X86_VIRT_VMX_TDX_SEAMLDR_H
+#define _X86_VIRT_VMX_TDX_SEAMLDR_H
+
+#ifdef CONFIG_INTEL_TDX_MODULE_UPDATE
+extern struct attribute_group seamldr_group;
+#define SEAMLDR_GROUP (&seamldr_group)
+int get_seamldr_info(void);
+#else
+#define SEAMLDR_GROUP NULL
+static inline int get_seamldr_info(void) { return 0; }
+#endif
+
+#endif
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index 5f1f463ddfe1..aa6a23d46494 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -41,6 +41,7 @@
#include <asm/processor.h>
#include <asm/mce.h>
#include "tdx.h"
+#include "seamldr.h"
static u32 tdx_global_keyid __ro_after_init;
static u32 tdx_guest_keyid_start __ro_after_init;
@@ -1147,13 +1148,24 @@ static struct tdx_tsm *init_tdx_tsm(void)
return no_free_ptr(tsm);
}
+static const struct attribute_group *tdx_subsys_groups[] = {
+ SEAMLDR_GROUP,
+ NULL,
+};
+
static void tdx_subsys_init(void)
{
struct tdx_tsm *tdx_tsm;
int err;
+ err = get_seamldr_info();
+ if (err) {
+ pr_err("failed to get seamldr info %d\n", err);
+ return;
+ }
+
/* Establish subsystem for global TDX module attributes */
- err = subsys_virtual_register(&tdx_subsys, NULL);
+ err = subsys_virtual_register(&tdx_subsys, tdx_subsys_groups);
if (err) {
pr_err("failed to register tdx_subsys %d\n", err);
return;
--
2.47.1
> +static const struct attribute_group *tdx_subsys_groups[] = {
> + SEAMLDR_GROUP,
> + NULL,
> +};
> +
> static void tdx_subsys_init(void)
> {
> struct tdx_tsm *tdx_tsm;
> int err;
>
> + err = get_seamldr_info();
> + if (err) {
> + pr_err("failed to get seamldr info %d\n", err);
> + return;
> + }
> +
> /* Establish subsystem for global TDX module attributes */
> - err = subsys_virtual_register(&tdx_subsys, NULL);
> + err = subsys_virtual_register(&tdx_subsys, tdx_subsys_groups);
> if (err) {
> pr_err("failed to register tdx_subsys %d\n", err);
> return;
As mentioned, TDX Connect also uses this virtual TSM device. And I tend
to extend it to TDX guest, also make the guest TSM management run on
the virtual device which represents the TDG calls and TDG_VP_VM calls.
So I'm considering extract the common part of tdx_subsys_init() out of
TDX host and into a separate file, e.g.
---
+source "drivers/virt/coco/tdx-tsm/Kconfig"
+
config TSM
bool
diff --git a/drivers/virt/coco/Makefile b/drivers/virt/coco/Makefile
index c0c3733be165..a54d3cb5b4e9 100644
--- a/drivers/virt/coco/Makefile
+++ b/drivers/virt/coco/Makefile
@@ -10,3 +10,4 @@ obj-$(CONFIG_INTEL_TDX_GUEST) += tdx-guest/
obj-$(CONFIG_ARM_CCA_GUEST) += arm-cca-guest/
obj-$(CONFIG_TSM) += tsm-core.o
obj-$(CONFIG_TSM_GUEST) += guest/
+obj-y += tdx-tsm/
diff --git a/drivers/virt/coco/tdx-tsm/Kconfig b/drivers/virt/coco/tdx-tsm/Kconfig
new file mode 100644
index 000000000000..768175f8bb2c
--- /dev/null
+++ b/drivers/virt/coco/tdx-tsm/Kconfig
@@ -0,0 +1,2 @@
+config TDX_TSM_BUS
+ bool
diff --git a/drivers/virt/coco/tdx-tsm/Makefile b/drivers/virt/coco/tdx-tsm/Makefile
new file mode 100644
index 000000000000..09f0ac08988a
--- /dev/null
+++ b/drivers/virt/coco/tdx-tsm/Makefile
@@ -0,0 +1 @@
+obj-$(CONFIG_TDX_TSM_BUS) += tdx-tsm-bus.o
---
And put the tdx_subsys_init() in tdx-tsm-bus.c. We need to move host
specific initializations out of tdx_subsys_init(), e.g. seamldr_group &
seamldr fw upload.
Thanks,
Yilun
Xu Yilun wrote:
> > +static const struct attribute_group *tdx_subsys_groups[] = {
> > + SEAMLDR_GROUP,
> > + NULL,
> > +};
> > +
> > static void tdx_subsys_init(void)
> > {
> > struct tdx_tsm *tdx_tsm;
> > int err;
> >
> > + err = get_seamldr_info();
> > + if (err) {
> > + pr_err("failed to get seamldr info %d\n", err);
> > + return;
> > + }
> > +
> > /* Establish subsystem for global TDX module attributes */
> > - err = subsys_virtual_register(&tdx_subsys, NULL);
> > + err = subsys_virtual_register(&tdx_subsys, tdx_subsys_groups);
> > if (err) {
> > pr_err("failed to register tdx_subsys %d\n", err);
> > return;
>
> As mentioned, TDX Connect also uses this virtual TSM device. And I tend
> to extend it to TDX guest, also make the guest TSM management run on
> the virtual device which represents the TDG calls and TDG_VP_VM calls.
>
> So I'm considering extract the common part of tdx_subsys_init() out of
> TDX host and into a separate file, e.g.
>
> ---
>
> +source "drivers/virt/coco/tdx-tsm/Kconfig"
> +
> config TSM
> bool
> diff --git a/drivers/virt/coco/Makefile b/drivers/virt/coco/Makefile
> index c0c3733be165..a54d3cb5b4e9 100644
> --- a/drivers/virt/coco/Makefile
> +++ b/drivers/virt/coco/Makefile
> @@ -10,3 +10,4 @@ obj-$(CONFIG_INTEL_TDX_GUEST) += tdx-guest/
> obj-$(CONFIG_ARM_CCA_GUEST) += arm-cca-guest/
> obj-$(CONFIG_TSM) += tsm-core.o
> obj-$(CONFIG_TSM_GUEST) += guest/
> +obj-y += tdx-tsm/
> diff --git a/drivers/virt/coco/tdx-tsm/Kconfig b/drivers/virt/coco/tdx-tsm/Kconfig
> new file mode 100644
> index 000000000000..768175f8bb2c
> --- /dev/null
> +++ b/drivers/virt/coco/tdx-tsm/Kconfig
> @@ -0,0 +1,2 @@
> +config TDX_TSM_BUS
> + bool
> diff --git a/drivers/virt/coco/tdx-tsm/Makefile b/drivers/virt/coco/tdx-tsm/Makefile
> new file mode 100644
> index 000000000000..09f0ac08988a
> --- /dev/null
> +++ b/drivers/virt/coco/tdx-tsm/Makefile
> @@ -0,0 +1 @@
> +obj-$(CONFIG_TDX_TSM_BUS) += tdx-tsm-bus.o
Just name it bus.c.
> ---
>
> And put the tdx_subsys_init() in tdx-tsm-bus.c. We need to move host
> specific initializations out of tdx_subsys_init(), e.g. seamldr_group &
> seamldr fw upload.
Just to be clear on the plan here as I think this TD Preserving set
should land before we start upstreamming any TDX Connect bits.
- Create drivers/virt/coco/tdx-tsm/bus.c for registering the tdx_subsys.
The tdx_subsys has sysfs attributes like "version" (host and guest
need this, but have different calls to get at the information) and
"firmware" (only host needs that). So the common code will take sysfs
groups passed as a parameter.
- The "tdx_tsm" device which is unused in this patch set can be
registered on the "tdx" bus to move feature support like TDX Connect
into a typical driver model.
So the change for this set is create a bus.c that is host/guest
agnostic, drop the tdx_tsm device and leave that to the TDX Connect
patches to add back.
The TDX Connect pathes will register the tdx_tsm device near where the
bus is registered for the host and guest cases.
Concerns?
In the meantime, until this set lands in tip we can work out the
organization in tsm.git#staging.
On Thu, Jul 31, 2025 at 02:01:21PM -0700, dan.j.williams@intel.com wrote:
> Xu Yilun wrote:
> > > +static const struct attribute_group *tdx_subsys_groups[] = {
> > > + SEAMLDR_GROUP,
> > > + NULL,
> > > +};
> > > +
> > > static void tdx_subsys_init(void)
> > > {
> > > struct tdx_tsm *tdx_tsm;
> > > int err;
> > >
> > > + err = get_seamldr_info();
> > > + if (err) {
> > > + pr_err("failed to get seamldr info %d\n", err);
> > > + return;
> > > + }
> > > +
> > > /* Establish subsystem for global TDX module attributes */
> > > - err = subsys_virtual_register(&tdx_subsys, NULL);
> > > + err = subsys_virtual_register(&tdx_subsys, tdx_subsys_groups);
> > > if (err) {
> > > pr_err("failed to register tdx_subsys %d\n", err);
> > > return;
> >
> > As mentioned, TDX Connect also uses this virtual TSM device. And I tend
> > to extend it to TDX guest, also make the guest TSM management run on
> > the virtual device which represents the TDG calls and TDG_VP_VM calls.
> >
> > So I'm considering extract the common part of tdx_subsys_init() out of
> > TDX host and into a separate file, e.g.
> >
> > ---
> >
> > +source "drivers/virt/coco/tdx-tsm/Kconfig"
> > +
> > config TSM
> > bool
> > diff --git a/drivers/virt/coco/Makefile b/drivers/virt/coco/Makefile
> > index c0c3733be165..a54d3cb5b4e9 100644
> > --- a/drivers/virt/coco/Makefile
> > +++ b/drivers/virt/coco/Makefile
> > @@ -10,3 +10,4 @@ obj-$(CONFIG_INTEL_TDX_GUEST) += tdx-guest/
> > obj-$(CONFIG_ARM_CCA_GUEST) += arm-cca-guest/
> > obj-$(CONFIG_TSM) += tsm-core.o
> > obj-$(CONFIG_TSM_GUEST) += guest/
> > +obj-y += tdx-tsm/
> > diff --git a/drivers/virt/coco/tdx-tsm/Kconfig b/drivers/virt/coco/tdx-tsm/Kconfig
> > new file mode 100644
> > index 000000000000..768175f8bb2c
> > --- /dev/null
> > +++ b/drivers/virt/coco/tdx-tsm/Kconfig
> > @@ -0,0 +1,2 @@
> > +config TDX_TSM_BUS
> > + bool
> > diff --git a/drivers/virt/coco/tdx-tsm/Makefile b/drivers/virt/coco/tdx-tsm/Makefile
> > new file mode 100644
> > index 000000000000..09f0ac08988a
> > --- /dev/null
> > +++ b/drivers/virt/coco/tdx-tsm/Makefile
> > @@ -0,0 +1 @@
> > +obj-$(CONFIG_TDX_TSM_BUS) += tdx-tsm-bus.o
>
> Just name it bus.c.
I'm about to make the change but I see there is already tdx-guest misc
virtual device in Guest OS:
What: /sys/devices/virtual/misc/tdx_guest/xxxx
And if we add another tdx_subsys, we have:
What: /sys/devices/virtual/tdx/xxxx
Do we really want 2 virtual devices? What's their relationship? I can't
figure out.
So I'm considering reuse the misc/tdx_guest device as a tdx root device
in guest. And that removes the need to have a common tdx tsm bus.
What do you think?
>
> > ---
> >
> > And put the tdx_subsys_init() in tdx-tsm-bus.c. We need to move host
> > specific initializations out of tdx_subsys_init(), e.g. seamldr_group &
> > seamldr fw upload.
>
> Just to be clear on the plan here as I think this TD Preserving set
> should land before we start upstreamming any TDX Connect bits.
>
> - Create drivers/virt/coco/tdx-tsm/bus.c for registering the tdx_subsys.
> The tdx_subsys has sysfs attributes like "version" (host and guest
> need this, but have different calls to get at the information) and
> "firmware" (only host needs that). So the common code will take sysfs
> groups passed as a parameter.
>
> - The "tdx_tsm" device which is unused in this patch set can be
It is used in this patch, Chao creates tdx module 'version' attr on this
device. But I assume you have different opinion: tdx_subsys represents
the whole tdx_module and should have the 'version', and tdx_tsm is a
sub device dedicate for TDX Connect, is it?
Thanks,
Yilun
> registered on the "tdx" bus to move feature support like TDX Connect
> into a typical driver model.
>
> So the change for this set is create a bus.c that is host/guest
> agnostic, drop the tdx_tsm device and leave that to the TDX Connect
> patches to add back.
>
> The TDX Connect pathes will register the tdx_tsm device near where the
> bus is registered for the host and guest cases.
>
> Concerns?
>
> In the meantime, until this set lands in tip we can work out the
> organization in tsm.git#staging.
Xu Yilun wrote: [..] > > > diff --git a/drivers/virt/coco/tdx-tsm/Makefile b/drivers/virt/coco/tdx-tsm/Makefile > > > new file mode 100644 > > > index 000000000000..09f0ac08988a > > > --- /dev/null > > > +++ b/drivers/virt/coco/tdx-tsm/Makefile > > > @@ -0,0 +1 @@ > > > +obj-$(CONFIG_TDX_TSM_BUS) += tdx-tsm-bus.o > > > > Just name it bus.c. > > I'm about to make the change but I see there is already tdx-guest misc > virtual device in Guest OS: > > What: /sys/devices/virtual/misc/tdx_guest/xxxx > > And if we add another tdx_subsys, we have: > > What: /sys/devices/virtual/tdx/xxxx > > Do we really want 2 virtual devices? What's their relationship? I can't > figure out. > > So I'm considering reuse the misc/tdx_guest device as a tdx root device > in guest. And that removes the need to have a common tdx tsm bus. > > What do you think? True, do not need tdx_subsys on the guest side. The tdx_guest driver is sufficient. This was the approach taken with the RTMR enabling, just append the sysfs attributes to the existing guest device. > > > And put the tdx_subsys_init() in tdx-tsm-bus.c. We need to move host > > > specific initializations out of tdx_subsys_init(), e.g. seamldr_group & > > > seamldr fw upload. > > > > Just to be clear on the plan here as I think this TD Preserving set > > should land before we start upstreamming any TDX Connect bits. > > > > - Create drivers/virt/coco/tdx-tsm/bus.c for registering the tdx_subsys. > > The tdx_subsys has sysfs attributes like "version" (host and guest > > need this, but have different calls to get at the information) and > > "firmware" (only host needs that). So the common code will take sysfs > > groups passed as a parameter. > > > > - The "tdx_tsm" device which is unused in this patch set can be > > It is used in this patch, Chao creates tdx module 'version' attr on this > device. But I assume you have different opinion: tdx_subsys represents > the whole tdx_module and should have the 'version', and tdx_tsm is a > sub device dedicate for TDX Connect, is it? The main reason for a tdx_tsm device in addition to the subsys is to allow for deferred attachment. Now, that said, the faux_device infrastructure has arrived since this all started and *could* replace tdx_subsys. The only concern is whether the tdx_tsm driver ever needs to do probe deferral to wait for IOMMU or PCI initialization to happen first. If probe deferral is needed that requires a bus, if probe can always be synchronous with TDX module init then faux_device could work.
> > > - Create drivers/virt/coco/tdx-tsm/bus.c for registering the tdx_subsys.
> > > The tdx_subsys has sysfs attributes like "version" (host and guest
> > > need this, but have different calls to get at the information) and
> > > "firmware" (only host needs that). So the common code will take sysfs
> > > groups passed as a parameter.
> > >
> > > - The "tdx_tsm" device which is unused in this patch set can be
> >
> > It is used in this patch, Chao creates tdx module 'version' attr on this
> > device. But I assume you have different opinion: tdx_subsys represents
> > the whole tdx_module and should have the 'version', and tdx_tsm is a
> > sub device dedicate for TDX Connect, is it?
>
> The main reason for a tdx_tsm device in addition to the subsys is to
> allow for deferred attachment.
I've found another reason, to dynamic control tdx tsm's lifecycle.
tdx_tsm driver uses seamcalls so its functionality relies on tdx module
initialization & vmxon. The former is a one way path but vmxon can be
dynamic off by KVM. vmxoff is fatal to tdx_tsm driver especially on some
can-not-fail destroy path.
So my idea is to remove tdx_tsm device (thus disables tdx_tsm driver) on
vmxoff.
KVM TDX core TDX TSM driver
-----------------------------------------------------
tdx_disable()
tdx_tsm dev del
driver.remove()
vmxoff()
An alternative is to move vmxon/off management out of KVM, that requires
a lot of complex work IMHO, Chao & I both prefer not to touch it.
That said, we still want to "deal with bus/driver binding logic" so faux
is not a good fit.
>
> Now, that said, the faux_device infrastructure has arrived since this
> all started and *could* replace tdx_subsys. The only concern is whether
> the tdx_tsm driver ever needs to do probe deferral to wait for IOMMU or
> PCI initialization to happen first.
The tdx_tsm driver needs to wait for IOMMU/PCI initialization...
>
> If probe deferral is needed that requires a bus, if probe can always be
> synchronous with TDX module init then faux_device could work.
... but doesn't see need for TDX Module early init now. Again TDX Module
init requires vmxon, so it can't be earlier than KVM init, nor the
IOMMU/PCI init. So probe synchronous with TDX module init should be OK.
But considering the tdx tsm's lifecycle concern, I still don't prefer
faux.
Thanks,
Yilun
Xu Yilun wrote:
> > > > - Create drivers/virt/coco/tdx-tsm/bus.c for registering the tdx_subsys.
> > > > The tdx_subsys has sysfs attributes like "version" (host and guest
> > > > need this, but have different calls to get at the information) and
> > > > "firmware" (only host needs that). So the common code will take sysfs
> > > > groups passed as a parameter.
> > > >
> > > > - The "tdx_tsm" device which is unused in this patch set can be
> > >
> > > It is used in this patch, Chao creates tdx module 'version' attr on this
> > > device. But I assume you have different opinion: tdx_subsys represents
> > > the whole tdx_module and should have the 'version', and tdx_tsm is a
> > > sub device dedicate for TDX Connect, is it?
> >
> > The main reason for a tdx_tsm device in addition to the subsys is to
> > allow for deferred attachment.
>
> I've found another reason, to dynamic control tdx tsm's lifecycle.
> tdx_tsm driver uses seamcalls so its functionality relies on tdx module
> initialization & vmxon. The former is a one way path but vmxon can be
> dynamic off by KVM. vmxoff is fatal to tdx_tsm driver especially on some
> can-not-fail destroy path.
>
> So my idea is to remove tdx_tsm device (thus disables tdx_tsm driver) on
> vmxoff.
>
> KVM TDX core TDX TSM driver
> -----------------------------------------------------
> tdx_disable()
> tdx_tsm dev del
> driver.remove()
> vmxoff()
>
> An alternative is to move vmxon/off management out of KVM, that requires
> a lot of complex work IMHO, Chao & I both prefer not to touch it.
It is fine to require that vmxon/off management remain within KVM, and
tie the lifetime of the device to the lifetime of the kvm_intel module*.
However, I think it is too violent to add/remove the device on async
vmxon/vmxoff.
Are there more sources of async vmxoff besides CPU offline, system
suspend, or system shutdown?
The suspend and shutdown cases can be handled with suspend and shutdown
callbacks in the tdx_tsm driver. Those will be called before KVM's
vmxoff. For CPU offline, is it safe to assume that the driver will not
be invoked from those CPUs?
Are there other sources of vmxoff?
> That said, we still want to "deal with bus/driver binding logic" so faux
> is not a good fit.
Faux device gives you a bus / driver-binding flow, it just expects that
the driver is always ready to bind immediately upon device create.
> > Now, that said, the faux_device infrastructure has arrived since this
> > all started and *could* replace tdx_subsys. The only concern is whether
> > the tdx_tsm driver ever needs to do probe deferral to wait for IOMMU or
> > PCI initialization to happen first.
>
> The tdx_tsm driver needs to wait for IOMMU/PCI initialization...
Intel IOMMU can not be modular and arrives at rootfs_initcall(). PCI
arrives at subsys_initcall(). The earliest that KVM arrives is
late_initcall() when it is built-in.
Hmm, so faux_device could work, all dependencies are resolved before the
device is created.
> > If probe deferral is needed that requires a bus, if probe can always be
> > synchronous with TDX module init then faux_device could work.
>
> ... but doesn't see need for TDX Module early init now. Again TDX Module
> init requires vmxon, so it can't be earlier than KVM init, nor the
> IOMMU/PCI init. So probe synchronous with TDX module init should be OK.
>
> But considering the tdx tsm's lifecycle concern, I still don't prefer
> faux.
If there are other sources of async vmxoff that are not handled by
'suspend' and 'shutdown' handlers in the tdx_tsm driver, then perhaps a
flag that gets toggled to fail requests. Otherwise it feels like the
tdx_tsm device should only end life at vt_exit() / tdx_cleanup().
> Thanks,
> Yilun
* It would be unfortunate if userspace needed to manually probe for TDX
Connect when KVM is not built-in. We might add a simple module that
requests kvm_intel in that case:
static const struct x86_cpu_id tdx_connect_autoprobe_ids[] = {
X86_MATCH_FEATURE(X86_FEATURE_TDX_HOST_PLATFORM, NULL),
{}
};
MODULE_DEVICE_TABLE(x86cpu, tdx_connect_autoprobe_ids);
...to allow for userspace to have dependencies on TDX Connect services
arriving automatically without needing to manually demand load
kvm_intel. That module would just immediately exit if TDX Connect
capability is not found.
On Mon, Aug 04, 2025, dan.j.williams@intel.com wrote: > Xu Yilun wrote: > > So my idea is to remove tdx_tsm device (thus disables tdx_tsm driver) on > > vmxoff. > > > > KVM TDX core TDX TSM driver > > ----------------------------------------------------- > > tdx_disable() > > tdx_tsm dev del > > driver.remove() > > vmxoff() > > > > An alternative is to move vmxon/off management out of KVM, that requires > > a lot of complex work IMHO, Chao & I both prefer not to touch it. Eh, it's complex, but not _that_ complex. > It is fine to require that vmxon/off management remain within KVM, and > tie the lifetime of the device to the lifetime of the kvm_intel module*. Nah, let's do this right. Speaking from experience; horrible, make-your-eyes-bleed experience; playing games with kvm-intel.ko to try to get and keep CPUs post-VMXON will end in tears. And it's not just TDX-feature-of-the-day that needs VMXON to be handled outside of KVM, I'd also like to do so to allow out-of-tree hypervisors to do the "right thing"[*]. Not because I care deeply about out-of-tree hypervisors, but because the lack of proper infrastructure for utilizing virtualization hardware irks me. The basic gist is to extract system-wide resources out of KVM and into a separate module, so that e.g. tdx_tsm or whatever can take a dependency on _that_ module and elevate refcounts as needed. All things considered, there aren't so many system-wide resources that it's an insurmountable task. I can provide some rough patches to kickstart things. It'll probably take me a few weeks to extract them from an old internal branch, and I can't promise they'll compile. But they should be good enough to serve as an RFC. https://lore.kernel.org/all/ZwQjUSOle6sWARsr@google.com > * It would be unfortunate if userspace needed to manually probe for TDX > Connect when KVM is not built-in. We might add a simple module that > requests kvm_intel in that case: Oh hell no :-) We have internal code that "requests" vendor module, and it might just be my least favorite thing. Juggling the locks and module lifetimes is just /shudder.
Sean Christopherson wrote: > On Mon, Aug 04, 2025, dan.j.williams@intel.com wrote: > > Xu Yilun wrote: > > > So my idea is to remove tdx_tsm device (thus disables tdx_tsm driver) on > > > vmxoff. > > > > > > KVM TDX core TDX TSM driver > > > ----------------------------------------------------- > > > tdx_disable() > > > tdx_tsm dev del > > > driver.remove() > > > vmxoff() > > > > > > An alternative is to move vmxon/off management out of KVM, that requires > > > a lot of complex work IMHO, Chao & I both prefer not to touch it. > > Eh, it's complex, but not _that_ complex. > > > It is fine to require that vmxon/off management remain within KVM, and > > tie the lifetime of the device to the lifetime of the kvm_intel module*. > > Nah, let's do this right. Speaking from experience; horrible, make-your-eyes-bleed > experience; playing games with kvm-intel.ko to try to get and keep CPUs post-VMXON > will end in tears. > > And it's not just TDX-feature-of-the-day that needs VMXON to be handled outside > of KVM, I'd also like to do so to allow out-of-tree hypervisors to do the "right > thing"[*]. Not because I care deeply about out-of-tree hypervisors, but because > the lack of proper infrastructure for utilizing virtualization hardware irks me. > > The basic gist is to extract system-wide resources out of KVM and into a separate > module, so that e.g. tdx_tsm or whatever can take a dependency on _that_ module > and elevate refcounts as needed. All things considered, there aren't so many > system-wide resources that it's an insurmountable task. > > I can provide some rough patches to kickstart things. It'll probably take me a > few weeks to extract them from an old internal branch, and I can't promise they'll > compile. But they should be good enough to serve as an RFC. > > https://lore.kernel.org/all/ZwQjUSOle6sWARsr@google.com Sounds reasonable to me. Not clear on how it impacts tdx_tsm implementation. The lifetime of this tdx_tsm device can still be bound by tdx_enable() / tdx_cleanup(). The refactor removes the need for the autoprobe hack below. It may also preclude async vmxoff cases by pinning? Or does pinning still not solve the reasons for bouncing vmx on suspend/shutdown? > > * It would be unfortunate if userspace needed to manually probe for TDX > > Connect when KVM is not built-in. We might add a simple module that > > requests kvm_intel in that case: > > Oh hell no :-) > > We have internal code that "requests" vendor module, and it might just be my least > favorite thing. Juggling the locks and module lifetimes is just /shudder. Oh, indeed, if there were locks and lifetime entanglements with kvm_intel involved then it would indeed be a mess. Effectively this was just looking for somewhere to drop a MODULE_SOFTDEP() since there is no good way to autoload "TEE I/O" for TDX. However, that indeed gets dropped / simpler if all of TDX's system-wide bits can just autoprobe and light up features without needing to load all of kvm.
On Mon, Aug 04, 2025, dan.j.williams@intel.com wrote: > Sean Christopherson wrote: > > On Mon, Aug 04, 2025, dan.j.williams@intel.com wrote: > > > Xu Yilun wrote: > > > > So my idea is to remove tdx_tsm device (thus disables tdx_tsm driver) on > > > > vmxoff. > > > > > > > > KVM TDX core TDX TSM driver > > > > ----------------------------------------------------- > > > > tdx_disable() > > > > tdx_tsm dev del > > > > driver.remove() > > > > vmxoff() > > > > > > > > An alternative is to move vmxon/off management out of KVM, that requires > > > > a lot of complex work IMHO, Chao & I both prefer not to touch it. > > > > Eh, it's complex, but not _that_ complex. > > > > > It is fine to require that vmxon/off management remain within KVM, and > > > tie the lifetime of the device to the lifetime of the kvm_intel module*. > > > > Nah, let's do this right. Speaking from experience; horrible, make-your-eyes-bleed > > experience; playing games with kvm-intel.ko to try to get and keep CPUs post-VMXON > > will end in tears. > > > > And it's not just TDX-feature-of-the-day that needs VMXON to be handled outside > > of KVM, I'd also like to do so to allow out-of-tree hypervisors to do the "right > > thing"[*]. Not because I care deeply about out-of-tree hypervisors, but because > > the lack of proper infrastructure for utilizing virtualization hardware irks me. > > > > The basic gist is to extract system-wide resources out of KVM and into a separate > > module, so that e.g. tdx_tsm or whatever can take a dependency on _that_ module > > and elevate refcounts as needed. All things considered, there aren't so many > > system-wide resources that it's an insurmountable task. > > > > I can provide some rough patches to kickstart things. It'll probably take me a > > few weeks to extract them from an old internal branch, and I can't promise they'll > > compile. But they should be good enough to serve as an RFC. > > > > https://lore.kernel.org/all/ZwQjUSOle6sWARsr@google.com > > Sounds reasonable to me. > > Not clear on how it impacts tdx_tsm implementation. The lifetime of this > tdx_tsm device can still be bound by tdx_enable() / tdx_cleanup(). The > refactor removes the need for the autoprobe hack below. It may also > preclude async vmxoff cases by pinning? Or does pinning still not solve > the reasons for bouncing vmx on suspend/shutdown? What exactly is the concern with suspend/shutdown? Suspend should be a non-issue, as userspace tasks need to be frozen before the kernel fires off the suspend notifiers. Ditto for a normal shutdown. Forced shutdown will be asynchronous with respect to running vCPUs, but all bets are off on a forced shutdown. Ditto for disabling VMX via NMI shootdown on a crash.
Sean Christopherson wrote: [..] > > Sounds reasonable to me. > > > > Not clear on how it impacts tdx_tsm implementation. The lifetime of this > > tdx_tsm device can still be bound by tdx_enable() / tdx_cleanup(). The > > refactor removes the need for the autoprobe hack below. It may also > > preclude async vmxoff cases by pinning? Or does pinning still not solve > > the reasons for bouncing vmx on suspend/shutdown? > > What exactly is the concern with suspend/shutdown? I was confused by Yilun's diagram that suggested vmxoff scenarios while kvm_intel is still loaded. > Suspend should be a non-issue, as userspace tasks need to be frozen before the > kernel fires off the suspend notifiers. Ditto for a normal shutdown. Yes, tdx_tsm can stay registered over those events. > Forced shutdown will be asynchronous with respect to running vCPUs, but all bets > are off on a forced shutdown. Ditto for disabling VMX via NMI shootdown on a > crash. Ok, to repeat back the implications: async vmxoff is not something that needs to gracefully shutdown the tdx_tsm device or system-wide TDX services. Those are already going to error out in the force shutdown case. tdx_tsm is a module for system-wide tdx_tsm services. Its lifetime starts at tdx_enable() and ends at tdx_cleanup(). Until a refactor completes tdx_enable() is called from the kvm_intel init path. Post refactor, tdx_enable() is in a system-wide TDX services module that depends on a shared module, not kvm_intel, to manage vmxon. kvm_intel is a peer client of this shared vmx module. While the TDX TEE I/O (device security) RFC is in flight the implementation will go through a phase of userspace needing to demand-load kvm_intel. The final implementation for mainline will have broken tdx_tsm's dependency on kvm_intel.
On Mon, Aug 04, 2025 at 09:02:51PM -0700, dan.j.williams@intel.com wrote: > Sean Christopherson wrote: > > On Mon, Aug 04, 2025, dan.j.williams@intel.com wrote: > > > Xu Yilun wrote: > > > > So my idea is to remove tdx_tsm device (thus disables tdx_tsm driver) on > > > > vmxoff. > > > > > > > > KVM TDX core TDX TSM driver > > > > ----------------------------------------------------- > > > > tdx_disable() > > > > tdx_tsm dev del > > > > driver.remove() > > > > vmxoff() > > > > > > > > An alternative is to move vmxon/off management out of KVM, that requires > > > > a lot of complex work IMHO, Chao & I both prefer not to touch it. > > > > Eh, it's complex, but not _that_ complex. > > > > > It is fine to require that vmxon/off management remain within KVM, and > > > tie the lifetime of the device to the lifetime of the kvm_intel module*. > > > > Nah, let's do this right. Speaking from experience; horrible, make-your-eyes-bleed > > experience; playing games with kvm-intel.ko to try to get and keep CPUs post-VMXON > > will end in tears. > > > > And it's not just TDX-feature-of-the-day that needs VMXON to be handled outside > > of KVM, I'd also like to do so to allow out-of-tree hypervisors to do the "right > > thing"[*]. Not because I care deeply about out-of-tree hypervisors, but because > > the lack of proper infrastructure for utilizing virtualization hardware irks me. > > > > The basic gist is to extract system-wide resources out of KVM and into a separate > > module, so that e.g. tdx_tsm or whatever can take a dependency on _that_ module > > and elevate refcounts as needed. All things considered, there aren't so many > > system-wide resources that it's an insurmountable task. > > > > I can provide some rough patches to kickstart things. It'll probably take me a > > few weeks to extract them from an old internal branch, and I can't promise they'll > > compile. But they should be good enough to serve as an RFC. > > > > https://lore.kernel.org/all/ZwQjUSOle6sWARsr@google.com > > Sounds reasonable to me. > > Not clear on how it impacts tdx_tsm implementation. The lifetime of this > tdx_tsm device can still be bound by tdx_enable() / tdx_cleanup(). The I assume with VMXON outside of KVM, tdx tsm driver could actively call tdx_bringup(), which includes VMXON, tdx_enable() and cpuhp handling. I.e, tdx_tsm device lifetime won't have to be bound to any other component, it could keep living until tdx_tsm module ends. > refactor removes the need for the autoprobe hack below. It may also > preclude async vmxoff cases by pinning? Or does pinning still not solve not by pinning, by cpuhp handling async vmxoff won't affect seamcall execution. > the reasons for bouncing vmx on suspend/shutdown? Thanks, Yilun
>As mentioned, TDX Connect also uses this virtual TSM device. And I tend >to extend it to TDX guest, also make the guest TSM management run on >the virtual device which represents the TDG calls and TDG_VP_VM calls. > >So I'm considering extract the common part of tdx_subsys_init() out of >TDX host and into a separate file, e.g. > >--- > >+source "drivers/virt/coco/tdx-tsm/Kconfig" >+ > config TSM > bool >diff --git a/drivers/virt/coco/Makefile b/drivers/virt/coco/Makefile >index c0c3733be165..a54d3cb5b4e9 100644 >--- a/drivers/virt/coco/Makefile >+++ b/drivers/virt/coco/Makefile >@@ -10,3 +10,4 @@ obj-$(CONFIG_INTEL_TDX_GUEST) += tdx-guest/ > obj-$(CONFIG_ARM_CCA_GUEST) += arm-cca-guest/ > obj-$(CONFIG_TSM) += tsm-core.o > obj-$(CONFIG_TSM_GUEST) += guest/ >+obj-y += tdx-tsm/ >diff --git a/drivers/virt/coco/tdx-tsm/Kconfig b/drivers/virt/coco/tdx-tsm/Kconfig >new file mode 100644 >index 000000000000..768175f8bb2c >--- /dev/null >+++ b/drivers/virt/coco/tdx-tsm/Kconfig >@@ -0,0 +1,2 @@ >+config TDX_TSM_BUS >+ bool >diff --git a/drivers/virt/coco/tdx-tsm/Makefile b/drivers/virt/coco/tdx-tsm/Makefile >new file mode 100644 >index 000000000000..09f0ac08988a >--- /dev/null >+++ b/drivers/virt/coco/tdx-tsm/Makefile >@@ -0,0 +1 @@ >+obj-$(CONFIG_TDX_TSM_BUS) += tdx-tsm-bus.o > >--- > >And put the tdx_subsys_init() in tdx-tsm-bus.c. We need to move host >specific initializations out of tdx_subsys_init(), e.g. seamldr_group & >seamldr fw upload. Sounds good. I assume you'll update the TDX TSM framework patch* directly. Please share the updated patch once it's ready, and I'll take care of all the seamldr stuff. [*]: https://lore.kernel.org/kvm/20250523095322.88774-5-chao.gao@intel.com/
© 2016 - 2025 Red Hat, Inc.