KVM_TDX_INIT_VM needs to be called after KVM_CREATE_VM and before
creating any VCPUs, thus before KVM_SET_CPUID2. KVM_TDX_INIT_VM accepts
the CPUID values directly.
Since KVM_GET_CPUID2 can't be used at this point, calculate the CPUID
values manually by using kvm_get_supported_cpuid() and filter the
returned CPUIDs against the supported CPUID values read from the TDX
module.
Co-developed-by: Isaku Yamahata <isaku.yamahata@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
Co-developed-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Rick Edgecombe <rick.p.edgecombe@intel.com>
Signed-off-by: Sagi Shahar <sagis@google.com>
---
.../selftests/kvm/include/x86/tdx/tdx_util.h | 54 +++++++
.../selftests/kvm/lib/x86/tdx/tdx_util.c | 132 ++++++++++++++++++
2 files changed, 186 insertions(+)
diff --git a/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h b/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h
index dafdc7e46abe..a2509959c7ce 100644
--- a/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h
+++ b/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h
@@ -11,6 +11,60 @@ static inline bool is_tdx_vm(struct kvm_vm *vm)
return vm->type == KVM_X86_TDX_VM;
}
+/*
+ * TDX ioctls
+ */
+
+#define __vm_tdx_vm_ioctl(vm, cmd, metadata, arg) \
+({ \
+ int r; \
+ \
+ union { \
+ struct kvm_tdx_cmd c; \
+ unsigned long raw; \
+ } tdx_cmd = { .c = { \
+ .id = (cmd), \
+ .flags = (uint32_t)(metadata), \
+ .data = (uint64_t)(arg), \
+ } }; \
+ \
+ r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd.raw); \
+ r ?: tdx_cmd.c.hw_error; \
+})
+
+#define vm_tdx_vm_ioctl(vm, cmd, flags, arg) \
+({ \
+ int ret = __vm_tdx_vm_ioctl(vm, cmd, flags, arg); \
+ \
+ __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
+})
+
+#define __vm_tdx_vcpu_ioctl(vcpu, cmd, metadata, arg) \
+({ \
+ int r; \
+ \
+ union { \
+ struct kvm_tdx_cmd c; \
+ unsigned long raw; \
+ } tdx_cmd = { .c = { \
+ .id = (cmd), \
+ .flags = (uint32_t)(metadata), \
+ .data = (uint64_t)(arg), \
+ } }; \
+ \
+ r = __vcpu_ioctl(vcpu, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd.raw); \
+ r ?: tdx_cmd.c.hw_error; \
+})
+
+#define vm_tdx_vcpu_ioctl(vcpu, cmd, flags, arg) \
+({ \
+ int ret = __vm_tdx_vcpu_ioctl(vcpu, cmd, flags, arg); \
+ \
+ __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \
+})
+
+void vm_tdx_init_vm(struct kvm_vm *vm, uint64_t attributes);
+
void vm_tdx_setup_boot_code_region(struct kvm_vm *vm);
void vm_tdx_setup_boot_parameters_region(struct kvm_vm *vm, uint32_t nr_runnable_vcpus);
void vm_tdx_load_common_boot_parameters(struct kvm_vm *vm);
diff --git a/tools/testing/selftests/kvm/lib/x86/tdx/tdx_util.c b/tools/testing/selftests/kvm/lib/x86/tdx/tdx_util.c
index f3b69923e928..7a622b4810b1 100644
--- a/tools/testing/selftests/kvm/lib/x86/tdx/tdx_util.c
+++ b/tools/testing/selftests/kvm/lib/x86/tdx/tdx_util.c
@@ -124,3 +124,135 @@ void vm_tdx_set_vcpu_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
vcpu_params->guest_code = (uint64_t)guest_code;
}
+
+static struct kvm_tdx_capabilities *tdx_read_capabilities(struct kvm_vm *vm)
+{
+ struct kvm_tdx_capabilities *tdx_cap = NULL;
+ int nr_cpuid_configs = 4;
+ int rc = -1;
+ int i;
+
+ do {
+ nr_cpuid_configs *= 2;
+
+ tdx_cap = realloc(tdx_cap, sizeof(*tdx_cap) +
+ sizeof(tdx_cap->cpuid) +
+ (sizeof(struct kvm_cpuid_entry2) * nr_cpuid_configs));
+ TEST_ASSERT(tdx_cap,
+ "Could not allocate memory for tdx capability nr_cpuid_configs %d\n",
+ nr_cpuid_configs);
+
+ tdx_cap->cpuid.nent = nr_cpuid_configs;
+ rc = __vm_tdx_vm_ioctl(vm, KVM_TDX_CAPABILITIES, 0, tdx_cap);
+ } while (rc < 0 && errno == E2BIG);
+
+ TEST_ASSERT(rc == 0, "KVM_TDX_CAPABILITIES failed: %d %d",
+ rc, errno);
+
+ pr_debug("tdx_cap: supported_attrs: 0x%016llx\n"
+ "tdx_cap: supported_xfam 0x%016llx\n",
+ tdx_cap->supported_attrs, tdx_cap->supported_xfam);
+
+ for (i = 0; i < tdx_cap->cpuid.nent; i++) {
+ const struct kvm_cpuid_entry2 *config = &tdx_cap->cpuid.entries[i];
+
+ pr_debug("cpuid config[%d]: leaf 0x%x sub_leaf 0x%x eax 0x%08x ebx 0x%08x ecx 0x%08x edx 0x%08x\n",
+ i, config->function, config->index,
+ config->eax, config->ebx, config->ecx, config->edx);
+ }
+
+ return tdx_cap;
+}
+
+static struct kvm_cpuid_entry2 *tdx_find_cpuid_config(struct kvm_tdx_capabilities *cap,
+ uint32_t leaf, uint32_t sub_leaf)
+{
+ struct kvm_cpuid_entry2 *config;
+ uint32_t i;
+
+ for (i = 0; i < cap->cpuid.nent; i++) {
+ config = &cap->cpuid.entries[i];
+
+ if (config->function == leaf && config->index == sub_leaf)
+ return config;
+ }
+
+ return NULL;
+}
+
+/*
+ * Filter CPUID based on TDX supported capabilities
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * cpuid_data - CPUID fileds to filter
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * For each CPUID leaf, filter out non-supported bits based on the capabilities reported
+ * by the TDX module
+ */
+static void vm_tdx_filter_cpuid(struct kvm_vm *vm,
+ struct kvm_cpuid2 *cpuid_data)
+{
+ struct kvm_tdx_capabilities *tdx_cap;
+ struct kvm_cpuid_entry2 *config;
+ struct kvm_cpuid_entry2 *e;
+ int i;
+
+ tdx_cap = tdx_read_capabilities(vm);
+
+ i = 0;
+ while (i < cpuid_data->nent) {
+ e = cpuid_data->entries + i;
+ config = tdx_find_cpuid_config(tdx_cap, e->function, e->index);
+
+ if (!config) {
+ int left = cpuid_data->nent - i - 1;
+
+ if (left > 0)
+ memmove(cpuid_data->entries + i,
+ cpuid_data->entries + i + 1,
+ sizeof(*cpuid_data->entries) * left);
+ cpuid_data->nent--;
+ continue;
+ }
+
+ e->eax &= config->eax;
+ e->ebx &= config->ebx;
+ e->ecx &= config->ecx;
+ e->edx &= config->edx;
+
+ i++;
+ }
+
+ free(tdx_cap);
+}
+
+void vm_tdx_init_vm(struct kvm_vm *vm, uint64_t attributes)
+{
+ struct kvm_tdx_init_vm *init_vm;
+ const struct kvm_cpuid2 *tmp;
+ struct kvm_cpuid2 *cpuid;
+
+ tmp = kvm_get_supported_cpuid();
+
+ cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+ memcpy(cpuid, tmp, kvm_cpuid2_size(tmp->nent));
+ vm_tdx_filter_cpuid(vm, cpuid);
+
+ init_vm = calloc(1, sizeof(*init_vm) +
+ sizeof(init_vm->cpuid.entries[0]) * cpuid->nent);
+ TEST_ASSERT(init_vm, "init_vm allocation failed");
+
+ memcpy(&init_vm->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
+ free(cpuid);
+
+ init_vm->attributes = attributes;
+
+ vm_tdx_vm_ioctl(vm, KVM_TDX_INIT_VM, 0, init_vm);
+
+ free(init_vm);
+}
--
2.51.1.851.g4ebd6896fd-goog
Hi Sagi,
On 10/28/25 2:20 PM, Sagi Shahar wrote:
> diff --git a/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h b/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h
> index dafdc7e46abe..a2509959c7ce 100644
> --- a/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h
> +++ b/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h
> @@ -11,6 +11,60 @@ static inline bool is_tdx_vm(struct kvm_vm *vm)
> return vm->type == KVM_X86_TDX_VM;
> }
>
> +/*
> + * TDX ioctls
> + */
> +
> +#define __vm_tdx_vm_ioctl(vm, cmd, metadata, arg) \
> +({ \
> + int r; \
> + \
> + union { \
> + struct kvm_tdx_cmd c; \
> + unsigned long raw; \
> + } tdx_cmd = { .c = { \
> + .id = (cmd), \
> + .flags = (uint32_t)(metadata), \
> + .data = (uint64_t)(arg), \
> + } }; \
> + \
> + r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd.raw); \
> + r ?: tdx_cmd.c.hw_error; \
> +})
> +
> +#define vm_tdx_vm_ioctl(vm, cmd, flags, arg) \
> +({ \
> + int ret = __vm_tdx_vm_ioctl(vm, cmd, flags, arg); \
> + \
> + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
> +})
> +
> +#define __vm_tdx_vcpu_ioctl(vcpu, cmd, metadata, arg) \
> +({ \
> + int r; \
> + \
> + union { \
> + struct kvm_tdx_cmd c; \
> + unsigned long raw; \
> + } tdx_cmd = { .c = { \
> + .id = (cmd), \
> + .flags = (uint32_t)(metadata), \
> + .data = (uint64_t)(arg), \
> + } }; \
> + \
> + r = __vcpu_ioctl(vcpu, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd.raw); \
> + r ?: tdx_cmd.c.hw_error; \
> +})
> +
> +#define vm_tdx_vcpu_ioctl(vcpu, cmd, flags, arg) \
> +({ \
> + int ret = __vm_tdx_vcpu_ioctl(vcpu, cmd, flags, arg); \
> + \
> + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \
> +})
> +
> +void vm_tdx_init_vm(struct kvm_vm *vm, uint64_t attributes);
> +
> void vm_tdx_setup_boot_code_region(struct kvm_vm *vm);
> void vm_tdx_setup_boot_parameters_region(struct kvm_vm *vm, uint32_t nr_runnable_vcpus);
> void vm_tdx_load_common_boot_parameters(struct kvm_vm *vm);
For completeness to help with discussion below other patches add:
void vm_tdx_load_vcpu_boot_parameters(struct kvm_vm *vm, struct kvm_vcpu *vcpu);
void vm_tdx_set_vcpu_entry_point(struct kvm_vcpu *vcpu, void *guest_code);
void vm_tdx_finalize(struct kvm_vm *vm);
When considering the TDX functions in tdx_util.h visible above the namespace of
TDX related functions is not clear to me. I believe an intuitive namespace
makes the code easier to understand and build upon.
Almost all tdx_util.h functions appear to have the "vm_tdx" prefix even when they just operate on a vCPU scope,
for example:
void vm_tdx_set_vcpu_entry_point(struct kvm_vcpu *vcpu, void *guest_code);
and
vm_tdx_vcpu_ioctl()
Also, when operating on a VM there may be an extra "vm" added to create a function like
vm_tdx_vm_ioctl() with two "vm" in its name.
Compare with similar functions for normal VMs:
vm_ioctl() -> vm_tdx_vm_ioctl()
vcpu_ioctl() -> vm_tdx_vcpu_ioctl()
Could it not perhaps instead be:
vm_ioctl() -> tdx_vm_ioctl()
vcpu_ioctl() -> tdx_vcpu_ioctl()
The functions could still have "vm"/"vcpu" in their name to designate the scope, for example:
void tdx_vm_setup_boot_code_region(struct kvm_vm *vm);
void tdx_vm_setup_boot_parameters_region(struct kvm_vm *vm, uint32_t nr_runnable_vcpus);
void tdx_vm_load_common_boot_parameters(struct kvm_vm *vm);
void tdx_vcpu_load_boot_parameters(struct kvm_vm *vm, struct kvm_vcpu *vcpu);
void tdx_vcpu_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code);
void tdx_vm_finalize(struct kvm_vm *vm);
With a namespace like above it is clear that (a) it is a TDX call and (b) what the scope of the
call is. This helps to understand what the code does while reading it and makes clear how to
name new functions when adding new features.
...
> +/*
> + * Filter CPUID based on TDX supported capabilities
> + *
> + * Input Args:
> + * vm - Virtual Machine
> + * cpuid_data - CPUID fileds to filter
fileds -> fields?
> + *
> + * Output Args: None
> + *
> + * Return: None
> + *
> + * For each CPUID leaf, filter out non-supported bits based on the capabilities reported
> + * by the TDX module
> + */
> +static void vm_tdx_filter_cpuid(struct kvm_vm *vm,
> + struct kvm_cpuid2 *cpuid_data)
> +{
> + struct kvm_tdx_capabilities *tdx_cap;
> + struct kvm_cpuid_entry2 *config;
> + struct kvm_cpuid_entry2 *e;
> + int i;
> +
> + tdx_cap = tdx_read_capabilities(vm);
> +
> + i = 0;
> + while (i < cpuid_data->nent) {
> + e = cpuid_data->entries + i;
> + config = tdx_find_cpuid_config(tdx_cap, e->function, e->index);
> +
> + if (!config) {
> + int left = cpuid_data->nent - i - 1;
> +
> + if (left > 0)
> + memmove(cpuid_data->entries + i,
> + cpuid_data->entries + i + 1,
> + sizeof(*cpuid_data->entries) * left);
> + cpuid_data->nent--;
> + continue;
> + }
> +
> + e->eax &= config->eax;
> + e->ebx &= config->ebx;
> + e->ecx &= config->ecx;
> + e->edx &= config->edx;
> +
> + i++;
> + }
> +
> + free(tdx_cap);
> +}
> +
> +void vm_tdx_init_vm(struct kvm_vm *vm, uint64_t attributes)
> +{
> + struct kvm_tdx_init_vm *init_vm;
> + const struct kvm_cpuid2 *tmp;
> + struct kvm_cpuid2 *cpuid;
> +
> + tmp = kvm_get_supported_cpuid();
> +
> + cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
Could this allocation be limited to tmp->nent?
> + memcpy(cpuid, tmp, kvm_cpuid2_size(tmp->nent));
> + vm_tdx_filter_cpuid(vm, cpuid);
> +
> + init_vm = calloc(1, sizeof(*init_vm) +
> + sizeof(init_vm->cpuid.entries[0]) * cpuid->nent);
> + TEST_ASSERT(init_vm, "init_vm allocation failed");
> +
> + memcpy(&init_vm->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
> + free(cpuid);
> +
> + init_vm->attributes = attributes;
> +
> + vm_tdx_vm_ioctl(vm, KVM_TDX_INIT_VM, 0, init_vm);
> +
> + free(init_vm);
> +}
Reinette
On 10/29/2025 5:20 AM, Sagi Shahar wrote: > KVM_TDX_INIT_VM needs to be called after KVM_CREATE_VM and before > creating any VCPUs, thus before KVM_SET_CPUID2. KVM_TDX_INIT_VM accepts > the CPUID values directly. This sentence seems not accurate. KVM_TDX_INIT_VM, i.e. the seamcall TDH.MNG.INIT, allows only directly configurable CPUID bits to be 1. > > Since KVM_GET_CPUID2 can't be used at this point, I don't think this is relevant. As mentioned above, only directly configurable CPUID bits can be 1, so the CPUIDs input for KVM_TDX_INIT_VM should be filtered against the supported directly configurable CPUID bits. > calculate the CPUID > values manually by using kvm_get_supported_cpuid() and filter the > returned CPUIDs against the supported CPUID values read from the TDX supported CPUID -> supported configurable CPUID > module. > > [...]
Sagi Shahar wrote:
[snip]
> diff --git a/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h b/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h
> index dafdc7e46abe..a2509959c7ce 100644
> --- a/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h
> +++ b/tools/testing/selftests/kvm/include/x86/tdx/tdx_util.h
> @@ -11,6 +11,60 @@ static inline bool is_tdx_vm(struct kvm_vm *vm)
> return vm->type == KVM_X86_TDX_VM;
> }
>
> +/*
> + * TDX ioctls
> + */
> +
> +#define __vm_tdx_vm_ioctl(vm, cmd, metadata, arg) \
NIT: Why not call 'metadata' -> 'flags'?
> +({ \
> + int r; \
> + \
> + union { \
> + struct kvm_tdx_cmd c; \
> + unsigned long raw; \
> + } tdx_cmd = { .c = { \
> + .id = (cmd), \
> + .flags = (uint32_t)(metadata), \
> + .data = (uint64_t)(arg), \
> + } }; \
> + \
> + r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd.raw); \
> + r ?: tdx_cmd.c.hw_error; \
> +})
I see this is a common pattern for kvm selftests but I'm struggling to
figure out why this is a macro and not a function call?
> +
> +#define vm_tdx_vm_ioctl(vm, cmd, flags, arg) \
> +({ \
> + int ret = __vm_tdx_vm_ioctl(vm, cmd, flags, arg); \
> + \
> + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
> +})
> +
> +#define __vm_tdx_vcpu_ioctl(vcpu, cmd, metadata, arg) \
NIT: Why not just call 'metadata', 'flags'?
> +({ \
> + int r; \
> + \
> + union { \
> + struct kvm_tdx_cmd c; \
> + unsigned long raw; \
> + } tdx_cmd = { .c = { \
> + .id = (cmd), \
> + .flags = (uint32_t)(metadata), \
> + .data = (uint64_t)(arg), \
> + } }; \
> + \
> + r = __vcpu_ioctl(vcpu, KVM_MEMORY_ENCRYPT_OP, &tdx_cmd.raw); \
> + r ?: tdx_cmd.c.hw_error; \
> +})
> +
[snip]
> +
> +static struct kvm_tdx_capabilities *tdx_read_capabilities(struct kvm_vm *vm)
> +{
> + struct kvm_tdx_capabilities *tdx_cap = NULL;
> + int nr_cpuid_configs = 4;
> + int rc = -1;
> + int i;
> +
> + do {
> + nr_cpuid_configs *= 2;
> +
> + tdx_cap = realloc(tdx_cap, sizeof(*tdx_cap) +
> + sizeof(tdx_cap->cpuid) +
> + (sizeof(struct kvm_cpuid_entry2) * nr_cpuid_configs));
> + TEST_ASSERT(tdx_cap,
> + "Could not allocate memory for tdx capability nr_cpuid_configs %d\n",
> + nr_cpuid_configs);
> +
> + tdx_cap->cpuid.nent = nr_cpuid_configs;
> + rc = __vm_tdx_vm_ioctl(vm, KVM_TDX_CAPABILITIES, 0, tdx_cap);
Why not use vm_tdx_vm_ioctl()?
Generally though it is good.
Reviewed-by: Ira Weiny <ira.weiny@intel.com>
[snip]
On 10/29/25 2:16 PM, Ira Weiny wrote:
>
>> +
>> +#define vm_tdx_vm_ioctl(vm, cmd, flags, arg) \
>> +({ \
>> + int ret = __vm_tdx_vm_ioctl(vm, cmd, flags, arg); \
>> + \
>> + __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
>> +})
>> +
>> +#define __vm_tdx_vcpu_ioctl(vcpu, cmd, metadata, arg) \
>
> NIT: Why not just call 'metadata', 'flags'?
>
Making this change would make the code easier to read by being consistent
with caller here as well as with kernel terms. If making this change please
consider its callers also, for example the "metadata" local variable of
tdx_init_mem_region() introduced in patch #14. Naming it flags would then also
be consistent with this change as well as how the flag is used in the kernel.
Reinette
© 2016 - 2025 Red Hat, Inc.