Invoke KVM_TDX_INIT in kvm_arch_pre_create_vcpu() that KVM_TDX_INIT
configures global TD configurations, e.g. the canonical CPUID config,
and must be executed prior to creating vCPUs.
Use kvm_x86_arch_cpuid() to setup the CPUID settings for TDX VM.
Note, this doesn't address the fact that QEMU may change the CPUID
configuration when creating vCPUs, i.e. punts on refactoring QEMU to
provide a stable CPUID config prior to kvm_arch_init().
Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
Acked-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Markus Armbruster <armbru@redhat.com>
---
Changes in v6:
- setup xfam explicitly to fit with new uapi;
- use tdx_caps->cpuid to filter the input of cpuids because now KVM only
allows the leafs that reported via KVM_TDX_GET_CAPABILITIES;
Changes in v4:
- mark init_vm with g_autofree() and use QEMU_LOCK_GUARD() to eliminate
the goto labels; (Daniel)
Changes in v3:
- Pass @errp in tdx_pre_create_vcpu() and pass error info to it. (Daniel)
---
accel/kvm/kvm-all.c | 8 ++++
target/i386/kvm/kvm.c | 15 +++++--
target/i386/kvm/kvm_i386.h | 3 ++
target/i386/kvm/meson.build | 2 +-
target/i386/kvm/tdx-stub.c | 8 ++++
target/i386/kvm/tdx.c | 87 +++++++++++++++++++++++++++++++++++++
target/i386/kvm/tdx.h | 6 +++
7 files changed, 125 insertions(+), 4 deletions(-)
create mode 100644 target/i386/kvm/tdx-stub.c
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 1732fa1adecd..4a1c9950894c 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -536,8 +536,15 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+ /*
+ * tdx_pre_create_vcpu() may call cpu_x86_cpuid(). It in turn may call
+ * kvm_vm_ioctl(). Set cpu->kvm_state in advance to avoid NULL pointer
+ * dereference.
+ */
+ cpu->kvm_state = s;
ret = kvm_arch_pre_create_vcpu(cpu, errp);
if (ret < 0) {
+ cpu->kvm_state = NULL;
goto err;
}
@@ -546,6 +553,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
error_setg_errno(errp, -ret,
"kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
kvm_arch_vcpu_id(cpu));
+ cpu->kvm_state = NULL;
goto err;
}
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index afbf67a7fdaa..db676c1336ab 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -38,6 +38,7 @@
#include "kvm_i386.h"
#include "../confidential-guest.h"
#include "sev.h"
+#include "tdx.h"
#include "xen-emu.h"
#include "hyperv.h"
#include "hyperv-proto.h"
@@ -1824,9 +1825,8 @@ static void kvm_init_nested_state(CPUX86State *env)
}
}
-static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
- struct kvm_cpuid_entry2 *entries,
- uint32_t cpuid_i)
+uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
+ uint32_t cpuid_i)
{
uint32_t limit, i, j;
uint32_t unused;
@@ -2358,6 +2358,15 @@ int kvm_arch_init_vcpu(CPUState *cs)
return r;
}
+int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ if (is_tdx_vm()) {
+ return tdx_pre_create_vcpu(cpu, errp);
+ }
+
+ return 0;
+}
+
int kvm_arch_destroy_vcpu(CPUState *cs)
{
X86CPU *cpu = X86_CPU(cs);
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index efb0883bd968..b1baf9e7f910 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -24,6 +24,9 @@
#define kvm_ioapic_in_kernel() \
(kvm_irqchip_in_kernel() && !kvm_irqchip_is_split())
+uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
+ uint32_t cpuid_i);
+
#else
#define kvm_pit_in_kernel() 0
diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build
index 466bccb9cb17..3f44cdedb758 100644
--- a/target/i386/kvm/meson.build
+++ b/target/i386/kvm/meson.build
@@ -8,7 +8,7 @@ i386_kvm_ss.add(files(
i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c'))
-i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'))
+i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'), if_false: files('tdx-stub.c'))
i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c'))
diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
new file mode 100644
index 000000000000..b614b46d3f4a
--- /dev/null
+++ b/target/i386/kvm/tdx-stub.c
@@ -0,0 +1,8 @@
+#include "qemu/osdep.h"
+
+#include "tdx.h"
+
+int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ return -EINVAL;
+}
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index ff3ef9bd8657..1b7894e43c6f 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -137,6 +137,91 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg)
return KVM_X86_TDX_VM;
}
+static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
+{
+ CPUX86State *env = &x86cpu->env;
+ uint64_t xfam;
+
+ xfam = env->features[FEAT_XSAVE_XCR0_LO] |
+ env->features[FEAT_XSAVE_XCR0_HI] |
+ env->features[FEAT_XSAVE_XSS_LO] |
+ env->features[FEAT_XSAVE_XSS_HI];
+
+ if (xfam & ~tdx_caps->supported_xfam) {
+ error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))",
+ xfam, tdx_caps->supported_xfam);
+ return -1;
+ }
+
+ tdx_guest->xfam = xfam;
+ return 0;
+}
+
+static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids)
+{
+ int i, dest_cnt = 0;
+ struct kvm_cpuid_entry2 *src, *dest, *conf;
+
+ for (i = 0; i < cpuids->nent; i++) {
+ src = cpuids->entries + i;
+ conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index);
+ if (!conf) {
+ continue;
+ }
+ dest = cpuids->entries + dest_cnt;
+
+ dest->function = src->function;
+ dest->index = src->index;
+ dest->flags = src->flags;
+ dest->eax = src->eax & conf->eax;
+ dest->ebx = src->ebx & conf->ebx;
+ dest->ecx = src->ecx & conf->ecx;
+ dest->edx = src->edx & conf->edx;
+
+ dest_cnt++;
+ }
+ cpuids->nent = dest_cnt++;
+}
+
+int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
+{
+ X86CPU *x86cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86cpu->env;
+ g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
+ int r = 0;
+
+ QEMU_LOCK_GUARD(&tdx_guest->lock);
+ if (tdx_guest->initialized) {
+ return r;
+ }
+
+ init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
+ sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
+
+ r = setup_td_xfam(x86cpu, errp);
+ if (r) {
+ return r;
+ }
+
+ init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
+ tdx_filter_cpuid(&init_vm->cpuid);
+
+ init_vm->attributes = tdx_guest->attributes;
+ init_vm->xfam = tdx_guest->xfam;
+
+ do {
+ r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm);
+ } while (r == -EAGAIN);
+ if (r < 0) {
+ error_setg_errno(errp, -r, "KVM_TDX_INIT_VM failed");
+ return r;
+ }
+
+ tdx_guest->initialized = true;
+
+ return 0;
+}
+
/* tdx guest */
OBJECT_DEFINE_TYPE_WITH_INTERFACES(TdxGuest,
tdx_guest,
@@ -150,6 +235,8 @@ static void tdx_guest_init(Object *obj)
ConfidentialGuestSupport *cgs = CONFIDENTIAL_GUEST_SUPPORT(obj);
TdxGuest *tdx = TDX_GUEST(obj);
+ qemu_mutex_init(&tdx->lock);
+
cgs->require_guest_memfd = true;
tdx->attributes = 0;
diff --git a/target/i386/kvm/tdx.h b/target/i386/kvm/tdx.h
index bca19c833e18..e077fd7d1653 100644
--- a/target/i386/kvm/tdx.h
+++ b/target/i386/kvm/tdx.h
@@ -17,7 +17,11 @@ typedef struct TdxGuestClass {
typedef struct TdxGuest {
X86ConfidentialGuest parent_obj;
+ QemuMutex lock;
+
+ bool initialized;
uint64_t attributes; /* TD attributes */
+ uint64_t xfam;
} TdxGuest;
#ifdef CONFIG_TDX
@@ -26,4 +30,6 @@ bool is_tdx_vm(void);
#define is_tdx_vm() 0
#endif /* CONFIG_TDX */
+int tdx_pre_create_vcpu(CPUState *cpu, Error **errp);
+
#endif /* QEMU_I386_TDX_H */
--
2.34.1
+Tony
On Tue, 2024-11-05 at 01:23 -0500, Xiaoyao Li wrote:
> +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
> +{
> + X86CPU *x86cpu = X86_CPU(cpu);
> + CPUX86State *env = &x86cpu->env;
> + g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
> + int r = 0;
> +
> + QEMU_LOCK_GUARD(&tdx_guest->lock);
> + if (tdx_guest->initialized) {
> + return r;
> + }
> +
> + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
> + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
> +
> + r = setup_td_xfam(x86cpu, errp);
> + if (r) {
> + return r;
> + }
> +
> + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
> + tdx_filter_cpuid(&init_vm->cpuid);
> +
> + init_vm->attributes = tdx_guest->attributes;
> + init_vm->xfam = tdx_guest->xfam;
> +
> + do {
> + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm);
> + } while (r == -EAGAIN);
KVM_TDX_INIT_VM can also return EBUSY. This should check for it, or KVM should
standardize on one for both conditions. In KVM, both cases handle
TDX_RND_NO_ENTROPY, but one tries to save some of the initialization for the
next attempt. I don't know why userspace would need to differentiate between the
two cases though, which makes me think we should just change the KVM side.
> + if (r < 0) {
> + error_setg_errno(errp, -r, "KVM_TDX_INIT_VM failed");
> + return r;
> + }
> +
> + tdx_guest->initialized = true;
> +
> + return 0;
> +}
On 11/6/2024 4:51 AM, Edgecombe, Rick P wrote:
> +Tony
>
> On Tue, 2024-11-05 at 01:23 -0500, Xiaoyao Li wrote:
>> +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
>> +{
>> + X86CPU *x86cpu = X86_CPU(cpu);
>> + CPUX86State *env = &x86cpu->env;
>> + g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
>> + int r = 0;
>> +
>> + QEMU_LOCK_GUARD(&tdx_guest->lock);
>> + if (tdx_guest->initialized) {
>> + return r;
>> + }
>> +
>> + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
>> + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
>> +
>> + r = setup_td_xfam(x86cpu, errp);
>> + if (r) {
>> + return r;
>> + }
>> +
>> + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
>> + tdx_filter_cpuid(&init_vm->cpuid);
>> +
>> + init_vm->attributes = tdx_guest->attributes;
>> + init_vm->xfam = tdx_guest->xfam;
>> +
>> + do {
>> + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm);
>> + } while (r == -EAGAIN);
>
> KVM_TDX_INIT_VM can also return EBUSY. This should check for it, or KVM should
> standardize on one for both conditions. In KVM, both cases handle
> TDX_RND_NO_ENTROPY, but one tries to save some of the initialization for the
> next attempt. I don't know why userspace would need to differentiate between the
> two cases though, which makes me think we should just change the KVM side.
I remember I tested retrying on the two cases and no surprise showed.
I agree to change KVM side to return -EAGAIN for the two cases.
>> + if (r < 0) {
>> + error_setg_errno(errp, -r, "KVM_TDX_INIT_VM failed");
>> + return r;
>> + }
>> +
>> + tdx_guest->initialized = true;
>> +
>> + return 0;
>> +}
>
On Wed, Nov 06, 2024 at 10:01:04AM +0800, Xiaoyao Li wrote:
> On 11/6/2024 4:51 AM, Edgecombe, Rick P wrote:
> > +Tony
> >
> > On Tue, 2024-11-05 at 01:23 -0500, Xiaoyao Li wrote:
> > > +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
> > > +{
> > > + X86CPU *x86cpu = X86_CPU(cpu);
> > > + CPUX86State *env = &x86cpu->env;
> > > + g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
> > > + int r = 0;
> > > +
> > > + QEMU_LOCK_GUARD(&tdx_guest->lock);
> > > + if (tdx_guest->initialized) {
> > > + return r;
> > > + }
> > > +
> > > + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
> > > + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
> > > +
> > > + r = setup_td_xfam(x86cpu, errp);
> > > + if (r) {
> > > + return r;
> > > + }
> > > +
> > > + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
> > > + tdx_filter_cpuid(&init_vm->cpuid);
> > > +
> > > + init_vm->attributes = tdx_guest->attributes;
> > > + init_vm->xfam = tdx_guest->xfam;
> > > +
> > > + do {
> > > + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm);
> > > + } while (r == -EAGAIN);
> >
> > KVM_TDX_INIT_VM can also return EBUSY. This should check for it, or KVM should
> > standardize on one for both conditions. In KVM, both cases handle
> > TDX_RND_NO_ENTROPY, but one tries to save some of the initialization for the
> > next attempt. I don't know why userspace would need to differentiate between the
> > two cases though, which makes me think we should just change the KVM side.
>
> I remember I tested retrying on the two cases and no surprise showed.
>
> I agree to change KVM side to return -EAGAIN for the two cases.
OK yeah let's patch KVM for it.
Regards,
Tony
On Wed, Nov 06, 2024 at 07:13:56AM +0200, Tony Lindgren wrote:
> On Wed, Nov 06, 2024 at 10:01:04AM +0800, Xiaoyao Li wrote:
> > On 11/6/2024 4:51 AM, Edgecombe, Rick P wrote:
> > > +Tony
> > >
> > > On Tue, 2024-11-05 at 01:23 -0500, Xiaoyao Li wrote:
> > > > +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
> > > > +{
> > > > + X86CPU *x86cpu = X86_CPU(cpu);
> > > > + CPUX86State *env = &x86cpu->env;
> > > > + g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
> > > > + int r = 0;
> > > > +
> > > > + QEMU_LOCK_GUARD(&tdx_guest->lock);
> > > > + if (tdx_guest->initialized) {
> > > > + return r;
> > > > + }
> > > > +
> > > > + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
> > > > + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
> > > > +
> > > > + r = setup_td_xfam(x86cpu, errp);
> > > > + if (r) {
> > > > + return r;
> > > > + }
> > > > +
> > > > + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
> > > > + tdx_filter_cpuid(&init_vm->cpuid);
> > > > +
> > > > + init_vm->attributes = tdx_guest->attributes;
> > > > + init_vm->xfam = tdx_guest->xfam;
> > > > +
> > > > + do {
> > > > + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm);
> > > > + } while (r == -EAGAIN);
> > >
> > > KVM_TDX_INIT_VM can also return EBUSY. This should check for it, or KVM should
> > > standardize on one for both conditions. In KVM, both cases handle
> > > TDX_RND_NO_ENTROPY, but one tries to save some of the initialization for the
> > > next attempt. I don't know why userspace would need to differentiate between the
> > > two cases though, which makes me think we should just change the KVM side.
> >
> > I remember I tested retrying on the two cases and no surprise showed.
> >
> > I agree to change KVM side to return -EAGAIN for the two cases.
>
> OK yeah let's patch KVM for it.
Will the patch to KVM converge such that it is ok for qemu to loop forever?
Ira
>
> Regards,
>
> Tony
On Thu, Dec 12, 2024 at 11:24:03AM -0600, Ira Weiny wrote:
> On Wed, Nov 06, 2024 at 07:13:56AM +0200, Tony Lindgren wrote:
> > On Wed, Nov 06, 2024 at 10:01:04AM +0800, Xiaoyao Li wrote:
> > > On 11/6/2024 4:51 AM, Edgecombe, Rick P wrote:
> > > > +Tony
> > > >
> > > > On Tue, 2024-11-05 at 01:23 -0500, Xiaoyao Li wrote:
> > > > > +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
> > > > > +{
> > > > > + X86CPU *x86cpu = X86_CPU(cpu);
> > > > > + CPUX86State *env = &x86cpu->env;
> > > > > + g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
> > > > > + int r = 0;
> > > > > +
> > > > > + QEMU_LOCK_GUARD(&tdx_guest->lock);
> > > > > + if (tdx_guest->initialized) {
> > > > > + return r;
> > > > > + }
> > > > > +
> > > > > + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
> > > > > + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
> > > > > +
> > > > > + r = setup_td_xfam(x86cpu, errp);
> > > > > + if (r) {
> > > > > + return r;
> > > > > + }
> > > > > +
> > > > > + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
> > > > > + tdx_filter_cpuid(&init_vm->cpuid);
> > > > > +
> > > > > + init_vm->attributes = tdx_guest->attributes;
> > > > > + init_vm->xfam = tdx_guest->xfam;
> > > > > +
> > > > > + do {
> > > > > + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm);
> > > > > + } while (r == -EAGAIN);
> > > >
> > > > KVM_TDX_INIT_VM can also return EBUSY. This should check for it, or KVM should
> > > > standardize on one for both conditions. In KVM, both cases handle
> > > > TDX_RND_NO_ENTROPY, but one tries to save some of the initialization for the
> > > > next attempt. I don't know why userspace would need to differentiate between the
> > > > two cases though, which makes me think we should just change the KVM side.
> > >
> > > I remember I tested retrying on the two cases and no surprise showed.
> > >
> > > I agree to change KVM side to return -EAGAIN for the two cases.
> >
> > OK yeah let's patch KVM for it.
>
> Will the patch to KVM converge such that it is ok for qemu to loop forever?
Hmm I don't think we should loop forever anywhere, the retries needed should
be only a few. Or what do you have in mind?
Regards,
Tony
On 12/17/2024 9:10 PM, Tony Lindgren wrote:
> On Thu, Dec 12, 2024 at 11:24:03AM -0600, Ira Weiny wrote:
>> On Wed, Nov 06, 2024 at 07:13:56AM +0200, Tony Lindgren wrote:
>>> On Wed, Nov 06, 2024 at 10:01:04AM +0800, Xiaoyao Li wrote:
>>>> On 11/6/2024 4:51 AM, Edgecombe, Rick P wrote:
>>>>> +Tony
>>>>>
>>>>> On Tue, 2024-11-05 at 01:23 -0500, Xiaoyao Li wrote:
>>>>>> +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
>>>>>> +{
>>>>>> + X86CPU *x86cpu = X86_CPU(cpu);
>>>>>> + CPUX86State *env = &x86cpu->env;
>>>>>> + g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
>>>>>> + int r = 0;
>>>>>> +
>>>>>> + QEMU_LOCK_GUARD(&tdx_guest->lock);
>>>>>> + if (tdx_guest->initialized) {
>>>>>> + return r;
>>>>>> + }
>>>>>> +
>>>>>> + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
>>>>>> + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
>>>>>> +
>>>>>> + r = setup_td_xfam(x86cpu, errp);
>>>>>> + if (r) {
>>>>>> + return r;
>>>>>> + }
>>>>>> +
>>>>>> + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
>>>>>> + tdx_filter_cpuid(&init_vm->cpuid);
>>>>>> +
>>>>>> + init_vm->attributes = tdx_guest->attributes;
>>>>>> + init_vm->xfam = tdx_guest->xfam;
>>>>>> +
>>>>>> + do {
>>>>>> + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm);
>>>>>> + } while (r == -EAGAIN);
>>>>>
>>>>> KVM_TDX_INIT_VM can also return EBUSY. This should check for it, or KVM should
>>>>> standardize on one for both conditions. In KVM, both cases handle
>>>>> TDX_RND_NO_ENTROPY, but one tries to save some of the initialization for the
>>>>> next attempt. I don't know why userspace would need to differentiate between the
>>>>> two cases though, which makes me think we should just change the KVM side.
>>>>
>>>> I remember I tested retrying on the two cases and no surprise showed.
>>>>
>>>> I agree to change KVM side to return -EAGAIN for the two cases.
>>>
>>> OK yeah let's patch KVM for it.
>>
>> Will the patch to KVM converge such that it is ok for qemu to loop forever?
>
> Hmm I don't think we should loop forever anywhere, the retries needed should
> be only a few. Or what do you have in mind?
"A few" seems not accurate. It depends on how heavy the RDRAND/RDSEED
traffic from others are. IIRC, it gets > 10 0000 -EAGAIN before success
when all the LPs in the system are doing RDRAND/RDSEED.
Maybe a timeout? E.g., QEMU exits when it cannot move forward for a
certain period.
However, I'm not sure what value is reasonable for the timeout.
> Regards,
>
> Tony
On Tue, Jan 14, 2025 at 08:39:37PM +0800, Xiaoyao Li wrote: > On 12/17/2024 9:10 PM, Tony Lindgren wrote: > > Hmm I don't think we should loop forever anywhere, the retries needed should > > be only a few. Or what do you have in mind? > > "A few" seems not accurate. It depends on how heavy the RDRAND/RDSEED > traffic from others are. IIRC, it gets > 10 0000 -EAGAIN before success when > all the LPs in the system are doing RDRAND/RDSEED. Oh OK :) > Maybe a timeout? E.g., QEMU exits when it cannot move forward for a certain > period. > > However, I'm not sure what value is reasonable for the timeout. Maybe some reasonable timeout could be multiplied by the number of CPUs or LPs available on the system? Regards, Tony
On Tue, Nov 05, 2024 at 01:23:17AM -0500, Xiaoyao Li wrote:
> Invoke KVM_TDX_INIT in kvm_arch_pre_create_vcpu() that KVM_TDX_INIT
> configures global TD configurations, e.g. the canonical CPUID config,
> and must be executed prior to creating vCPUs.
>
> Use kvm_x86_arch_cpuid() to setup the CPUID settings for TDX VM.
>
> Note, this doesn't address the fact that QEMU may change the CPUID
> configuration when creating vCPUs, i.e. punts on refactoring QEMU to
> provide a stable CPUID config prior to kvm_arch_init().
>
> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> Acked-by: Gerd Hoffmann <kraxel@redhat.com>
> Acked-by: Markus Armbruster <armbru@redhat.com>
> ---
> Changes in v6:
> - setup xfam explicitly to fit with new uapi;
> - use tdx_caps->cpuid to filter the input of cpuids because now KVM only
> allows the leafs that reported via KVM_TDX_GET_CAPABILITIES;
>
> Changes in v4:
> - mark init_vm with g_autofree() and use QEMU_LOCK_GUARD() to eliminate
> the goto labels; (Daniel)
> Changes in v3:
> - Pass @errp in tdx_pre_create_vcpu() and pass error info to it. (Daniel)
> ---
> accel/kvm/kvm-all.c | 8 ++++
> target/i386/kvm/kvm.c | 15 +++++--
> target/i386/kvm/kvm_i386.h | 3 ++
> target/i386/kvm/meson.build | 2 +-
> target/i386/kvm/tdx-stub.c | 8 ++++
> target/i386/kvm/tdx.c | 87 +++++++++++++++++++++++++++++++++++++
> target/i386/kvm/tdx.h | 6 +++
> 7 files changed, 125 insertions(+), 4 deletions(-)
> create mode 100644 target/i386/kvm/tdx-stub.c
>
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index 1732fa1adecd..4a1c9950894c 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -536,8 +536,15 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
>
> trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>
> + /*
> + * tdx_pre_create_vcpu() may call cpu_x86_cpuid(). It in turn may call
> + * kvm_vm_ioctl(). Set cpu->kvm_state in advance to avoid NULL pointer
> + * dereference.
> + */
> + cpu->kvm_state = s;
> ret = kvm_arch_pre_create_vcpu(cpu, errp);
> if (ret < 0) {
> + cpu->kvm_state = NULL;
> goto err;
> }
>
> @@ -546,6 +553,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
> error_setg_errno(errp, -ret,
> "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
> kvm_arch_vcpu_id(cpu));
> + cpu->kvm_state = NULL;
> goto err;
> }
>
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index afbf67a7fdaa..db676c1336ab 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -38,6 +38,7 @@
> #include "kvm_i386.h"
> #include "../confidential-guest.h"
> #include "sev.h"
> +#include "tdx.h"
> #include "xen-emu.h"
> #include "hyperv.h"
> #include "hyperv-proto.h"
> @@ -1824,9 +1825,8 @@ static void kvm_init_nested_state(CPUX86State *env)
> }
> }
>
> -static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
> - struct kvm_cpuid_entry2 *entries,
> - uint32_t cpuid_i)
> +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
> + uint32_t cpuid_i)
> {
> uint32_t limit, i, j;
> uint32_t unused;
> @@ -2358,6 +2358,15 @@ int kvm_arch_init_vcpu(CPUState *cs)
> return r;
> }
>
> +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
> +{
> + if (is_tdx_vm()) {
> + return tdx_pre_create_vcpu(cpu, errp);
> + }
> +
> + return 0;
> +}
> +
> int kvm_arch_destroy_vcpu(CPUState *cs)
> {
> X86CPU *cpu = X86_CPU(cs);
> diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
> index efb0883bd968..b1baf9e7f910 100644
> --- a/target/i386/kvm/kvm_i386.h
> +++ b/target/i386/kvm/kvm_i386.h
> @@ -24,6 +24,9 @@
> #define kvm_ioapic_in_kernel() \
> (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split())
>
> +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
> + uint32_t cpuid_i);
> +
> #else
>
> #define kvm_pit_in_kernel() 0
> diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build
> index 466bccb9cb17..3f44cdedb758 100644
> --- a/target/i386/kvm/meson.build
> +++ b/target/i386/kvm/meson.build
> @@ -8,7 +8,7 @@ i386_kvm_ss.add(files(
>
> i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c'))
>
> -i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'))
> +i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'), if_false: files('tdx-stub.c'))
>
> i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c'))
>
> diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
> new file mode 100644
> index 000000000000..b614b46d3f4a
> --- /dev/null
> +++ b/target/i386/kvm/tdx-stub.c
> @@ -0,0 +1,8 @@
> +#include "qemu/osdep.h"
> +
> +#include "tdx.h"
> +
> +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
> +{
> + return -EINVAL;
> +}
> diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
> index ff3ef9bd8657..1b7894e43c6f 100644
> --- a/target/i386/kvm/tdx.c
> +++ b/target/i386/kvm/tdx.c
> @@ -137,6 +137,91 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg)
> return KVM_X86_TDX_VM;
> }
>
> +static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
> +{
> + CPUX86State *env = &x86cpu->env;
> + uint64_t xfam;
> +
> + xfam = env->features[FEAT_XSAVE_XCR0_LO] |
> + env->features[FEAT_XSAVE_XCR0_HI] |
> + env->features[FEAT_XSAVE_XSS_LO] |
> + env->features[FEAT_XSAVE_XSS_HI];
> +
> + if (xfam & ~tdx_caps->supported_xfam) {
> + error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))",
> + xfam, tdx_caps->supported_xfam);
> + return -1;
> + }
> +
> + tdx_guest->xfam = xfam;
> + return 0;
> +}
> +
> +static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids)
> +{
> + int i, dest_cnt = 0;
> + struct kvm_cpuid_entry2 *src, *dest, *conf;
> +
> + for (i = 0; i < cpuids->nent; i++) {
> + src = cpuids->entries + i;
> + conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index);
> + if (!conf) {
> + continue;
> + }
> + dest = cpuids->entries + dest_cnt;
> +
> + dest->function = src->function;
> + dest->index = src->index;
> + dest->flags = src->flags;
> + dest->eax = src->eax & conf->eax;
> + dest->ebx = src->ebx & conf->ebx;
> + dest->ecx = src->ecx & conf->ecx;
> + dest->edx = src->edx & conf->edx;
> +
> + dest_cnt++;
> + }
> + cpuids->nent = dest_cnt++;
> +}
> +
> +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
> +{
> + X86CPU *x86cpu = X86_CPU(cpu);
> + CPUX86State *env = &x86cpu->env;
> + g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
> + int r = 0;
> +
> + QEMU_LOCK_GUARD(&tdx_guest->lock);
> + if (tdx_guest->initialized) {
> + return r;
> + }
> +
> + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
> + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
> +
> + r = setup_td_xfam(x86cpu, errp);
> + if (r) {
> + return r;
> + }
> +
> + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
> + tdx_filter_cpuid(&init_vm->cpuid);
> +
> + init_vm->attributes = tdx_guest->attributes;
> + init_vm->xfam = tdx_guest->xfam;
> +
> + do {
> + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm);
> + } while (r == -EAGAIN);
Other calls to tdx_vm_ioctl don't loop on EAGAIN. Is the need to
do this retry specific to only KVM_TDX_INIT_VM, or should we push
the EAGAIN retry logic inside tdx_vm_ioctl_helper so all callers
benefit ?
> + if (r < 0) {
> + error_setg_errno(errp, -r, "KVM_TDX_INIT_VM failed");
> + return r;
> + }
> +
> + tdx_guest->initialized = true;
> +
> + return 0;
> +}
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On 11/5/2024 6:34 PM, Daniel P. Berrangé wrote:
> On Tue, Nov 05, 2024 at 01:23:17AM -0500, Xiaoyao Li wrote:
>> Invoke KVM_TDX_INIT in kvm_arch_pre_create_vcpu() that KVM_TDX_INIT
>> configures global TD configurations, e.g. the canonical CPUID config,
>> and must be executed prior to creating vCPUs.
>>
>> Use kvm_x86_arch_cpuid() to setup the CPUID settings for TDX VM.
>>
>> Note, this doesn't address the fact that QEMU may change the CPUID
>> configuration when creating vCPUs, i.e. punts on refactoring QEMU to
>> provide a stable CPUID config prior to kvm_arch_init().
>>
>> Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
>> Acked-by: Gerd Hoffmann <kraxel@redhat.com>
>> Acked-by: Markus Armbruster <armbru@redhat.com>
>> ---
>> Changes in v6:
>> - setup xfam explicitly to fit with new uapi;
>> - use tdx_caps->cpuid to filter the input of cpuids because now KVM only
>> allows the leafs that reported via KVM_TDX_GET_CAPABILITIES;
>>
>> Changes in v4:
>> - mark init_vm with g_autofree() and use QEMU_LOCK_GUARD() to eliminate
>> the goto labels; (Daniel)
>> Changes in v3:
>> - Pass @errp in tdx_pre_create_vcpu() and pass error info to it. (Daniel)
>> ---
>> accel/kvm/kvm-all.c | 8 ++++
>> target/i386/kvm/kvm.c | 15 +++++--
>> target/i386/kvm/kvm_i386.h | 3 ++
>> target/i386/kvm/meson.build | 2 +-
>> target/i386/kvm/tdx-stub.c | 8 ++++
>> target/i386/kvm/tdx.c | 87 +++++++++++++++++++++++++++++++++++++
>> target/i386/kvm/tdx.h | 6 +++
>> 7 files changed, 125 insertions(+), 4 deletions(-)
>> create mode 100644 target/i386/kvm/tdx-stub.c
>>
>> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
>> index 1732fa1adecd..4a1c9950894c 100644
>> --- a/accel/kvm/kvm-all.c
>> +++ b/accel/kvm/kvm-all.c
>> @@ -536,8 +536,15 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
>>
>> trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
>>
>> + /*
>> + * tdx_pre_create_vcpu() may call cpu_x86_cpuid(). It in turn may call
>> + * kvm_vm_ioctl(). Set cpu->kvm_state in advance to avoid NULL pointer
>> + * dereference.
>> + */
>> + cpu->kvm_state = s;
>> ret = kvm_arch_pre_create_vcpu(cpu, errp);
>> if (ret < 0) {
>> + cpu->kvm_state = NULL;
>> goto err;
>> }
>>
>> @@ -546,6 +553,7 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
>> error_setg_errno(errp, -ret,
>> "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
>> kvm_arch_vcpu_id(cpu));
>> + cpu->kvm_state = NULL;
>> goto err;
>> }
>>
>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
>> index afbf67a7fdaa..db676c1336ab 100644
>> --- a/target/i386/kvm/kvm.c
>> +++ b/target/i386/kvm/kvm.c
>> @@ -38,6 +38,7 @@
>> #include "kvm_i386.h"
>> #include "../confidential-guest.h"
>> #include "sev.h"
>> +#include "tdx.h"
>> #include "xen-emu.h"
>> #include "hyperv.h"
>> #include "hyperv-proto.h"
>> @@ -1824,9 +1825,8 @@ static void kvm_init_nested_state(CPUX86State *env)
>> }
>> }
>>
>> -static uint32_t kvm_x86_build_cpuid(CPUX86State *env,
>> - struct kvm_cpuid_entry2 *entries,
>> - uint32_t cpuid_i)
>> +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
>> + uint32_t cpuid_i)
>> {
>> uint32_t limit, i, j;
>> uint32_t unused;
>> @@ -2358,6 +2358,15 @@ int kvm_arch_init_vcpu(CPUState *cs)
>> return r;
>> }
>>
>> +int kvm_arch_pre_create_vcpu(CPUState *cpu, Error **errp)
>> +{
>> + if (is_tdx_vm()) {
>> + return tdx_pre_create_vcpu(cpu, errp);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> int kvm_arch_destroy_vcpu(CPUState *cs)
>> {
>> X86CPU *cpu = X86_CPU(cs);
>> diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
>> index efb0883bd968..b1baf9e7f910 100644
>> --- a/target/i386/kvm/kvm_i386.h
>> +++ b/target/i386/kvm/kvm_i386.h
>> @@ -24,6 +24,9 @@
>> #define kvm_ioapic_in_kernel() \
>> (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split())
>>
>> +uint32_t kvm_x86_build_cpuid(CPUX86State *env, struct kvm_cpuid_entry2 *entries,
>> + uint32_t cpuid_i);
>> +
>> #else
>>
>> #define kvm_pit_in_kernel() 0
>> diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build
>> index 466bccb9cb17..3f44cdedb758 100644
>> --- a/target/i386/kvm/meson.build
>> +++ b/target/i386/kvm/meson.build
>> @@ -8,7 +8,7 @@ i386_kvm_ss.add(files(
>>
>> i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c'))
>>
>> -i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'))
>> +i386_kvm_ss.add(when: 'CONFIG_TDX', if_true: files('tdx.c'), if_false: files('tdx-stub.c'))
>>
>> i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c'))
>>
>> diff --git a/target/i386/kvm/tdx-stub.c b/target/i386/kvm/tdx-stub.c
>> new file mode 100644
>> index 000000000000..b614b46d3f4a
>> --- /dev/null
>> +++ b/target/i386/kvm/tdx-stub.c
>> @@ -0,0 +1,8 @@
>> +#include "qemu/osdep.h"
>> +
>> +#include "tdx.h"
>> +
>> +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
>> +{
>> + return -EINVAL;
>> +}
>> diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
>> index ff3ef9bd8657..1b7894e43c6f 100644
>> --- a/target/i386/kvm/tdx.c
>> +++ b/target/i386/kvm/tdx.c
>> @@ -137,6 +137,91 @@ static int tdx_kvm_type(X86ConfidentialGuest *cg)
>> return KVM_X86_TDX_VM;
>> }
>>
>> +static int setup_td_xfam(X86CPU *x86cpu, Error **errp)
>> +{
>> + CPUX86State *env = &x86cpu->env;
>> + uint64_t xfam;
>> +
>> + xfam = env->features[FEAT_XSAVE_XCR0_LO] |
>> + env->features[FEAT_XSAVE_XCR0_HI] |
>> + env->features[FEAT_XSAVE_XSS_LO] |
>> + env->features[FEAT_XSAVE_XSS_HI];
>> +
>> + if (xfam & ~tdx_caps->supported_xfam) {
>> + error_setg(errp, "Invalid XFAM 0x%lx for TDX VM (supported: 0x%llx))",
>> + xfam, tdx_caps->supported_xfam);
>> + return -1;
>> + }
>> +
>> + tdx_guest->xfam = xfam;
>> + return 0;
>> +}
>> +
>> +static void tdx_filter_cpuid(struct kvm_cpuid2 *cpuids)
>> +{
>> + int i, dest_cnt = 0;
>> + struct kvm_cpuid_entry2 *src, *dest, *conf;
>> +
>> + for (i = 0; i < cpuids->nent; i++) {
>> + src = cpuids->entries + i;
>> + conf = cpuid_find_entry(&tdx_caps->cpuid, src->function, src->index);
>> + if (!conf) {
>> + continue;
>> + }
>> + dest = cpuids->entries + dest_cnt;
>> +
>> + dest->function = src->function;
>> + dest->index = src->index;
>> + dest->flags = src->flags;
>> + dest->eax = src->eax & conf->eax;
>> + dest->ebx = src->ebx & conf->ebx;
>> + dest->ecx = src->ecx & conf->ecx;
>> + dest->edx = src->edx & conf->edx;
>> +
>> + dest_cnt++;
>> + }
>> + cpuids->nent = dest_cnt++;
>> +}
>> +
>> +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
>> +{
>> + X86CPU *x86cpu = X86_CPU(cpu);
>> + CPUX86State *env = &x86cpu->env;
>> + g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
>> + int r = 0;
>> +
>> + QEMU_LOCK_GUARD(&tdx_guest->lock);
>> + if (tdx_guest->initialized) {
>> + return r;
>> + }
>> +
>> + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
>> + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
>> +
>> + r = setup_td_xfam(x86cpu, errp);
>> + if (r) {
>> + return r;
>> + }
>> +
>> + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
>> + tdx_filter_cpuid(&init_vm->cpuid);
>> +
>> + init_vm->attributes = tdx_guest->attributes;
>> + init_vm->xfam = tdx_guest->xfam;
>> +
>> + do {
>> + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm);
>> + } while (r == -EAGAIN);
>
> Other calls to tdx_vm_ioctl don't loop on EAGAIN. Is the need to
> do this retry specific to only KVM_TDX_INIT_VM, or should we push
> the EAGAIN retry logic inside tdx_vm_ioctl_helper so all callers
> benefit ?
So far, only KVM_TDX_INIT_VM can get -EAGAIN due to KVM side
TDH_MNG_CREATE gets TDX_RND_NO_ENTROPY because Random number generation
(e.g., RDRAND or RDSEED) failed and in this case it should retry.
I think adding a commment to explain why it can get -EAGAIN and needs to
retry should suffice?
>> + if (r < 0) {
>> + error_setg_errno(errp, -r, "KVM_TDX_INIT_VM failed");
>> + return r;
>> + }
>> +
>> + tdx_guest->initialized = true;
>> +
>> + return 0;
>> +}
>
> With regards,
> Daniel
On Tue, Nov 05, 2024 at 07:51:53PM +0800, Xiaoyao Li wrote:
> On 11/5/2024 6:34 PM, Daniel P. Berrangé wrote:
> > On Tue, Nov 05, 2024 at 01:23:17AM -0500, Xiaoyao Li wrote:
> > > Invoke KVM_TDX_INIT in kvm_arch_pre_create_vcpu() that KVM_TDX_INIT
> > > configures global TD configurations, e.g. the canonical CPUID config,
> > > and must be executed prior to creating vCPUs.
> > >
> > > Use kvm_x86_arch_cpuid() to setup the CPUID settings for TDX VM.
> > >
> > > Note, this doesn't address the fact that QEMU may change the CPUID
> > > configuration when creating vCPUs, i.e. punts on refactoring QEMU to
> > > provide a stable CPUID config prior to kvm_arch_init().
> > >
> > > Signed-off-by: Xiaoyao Li <xiaoyao.li@intel.com>
> > > Acked-by: Gerd Hoffmann <kraxel@redhat.com>
> > > Acked-by: Markus Armbruster <armbru@redhat.com>
> > > ---
> > > Changes in v6:
> > > - setup xfam explicitly to fit with new uapi;
> > > - use tdx_caps->cpuid to filter the input of cpuids because now KVM only
> > > allows the leafs that reported via KVM_TDX_GET_CAPABILITIES;
> > >
> > > Changes in v4:
> > > - mark init_vm with g_autofree() and use QEMU_LOCK_GUARD() to eliminate
> > > the goto labels; (Daniel)
> > > Changes in v3:
> > > - Pass @errp in tdx_pre_create_vcpu() and pass error info to it. (Daniel)
> > > ---
> > > accel/kvm/kvm-all.c | 8 ++++
> > > target/i386/kvm/kvm.c | 15 +++++--
> > > target/i386/kvm/kvm_i386.h | 3 ++
> > > target/i386/kvm/meson.build | 2 +-
> > > target/i386/kvm/tdx-stub.c | 8 ++++
> > > target/i386/kvm/tdx.c | 87 +++++++++++++++++++++++++++++++++++++
> > > target/i386/kvm/tdx.h | 6 +++
> > > 7 files changed, 125 insertions(+), 4 deletions(-)
> > > create mode 100644 target/i386/kvm/tdx-stub.c
> > > +int tdx_pre_create_vcpu(CPUState *cpu, Error **errp)
> > > +{
> > > + X86CPU *x86cpu = X86_CPU(cpu);
> > > + CPUX86State *env = &x86cpu->env;
> > > + g_autofree struct kvm_tdx_init_vm *init_vm = NULL;
> > > + int r = 0;
> > > +
> > > + QEMU_LOCK_GUARD(&tdx_guest->lock);
> > > + if (tdx_guest->initialized) {
> > > + return r;
> > > + }
> > > +
> > > + init_vm = g_malloc0(sizeof(struct kvm_tdx_init_vm) +
> > > + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES);
> > > +
> > > + r = setup_td_xfam(x86cpu, errp);
> > > + if (r) {
> > > + return r;
> > > + }
> > > +
> > > + init_vm->cpuid.nent = kvm_x86_build_cpuid(env, init_vm->cpuid.entries, 0);
> > > + tdx_filter_cpuid(&init_vm->cpuid);
> > > +
> > > + init_vm->attributes = tdx_guest->attributes;
> > > + init_vm->xfam = tdx_guest->xfam;
> > > +
> > > + do {
> > > + r = tdx_vm_ioctl(KVM_TDX_INIT_VM, 0, init_vm);
> > > + } while (r == -EAGAIN);
> >
> > Other calls to tdx_vm_ioctl don't loop on EAGAIN. Is the need to
> > do this retry specific to only KVM_TDX_INIT_VM, or should we push
> > the EAGAIN retry logic inside tdx_vm_ioctl_helper so all callers
> > benefit ?
>
> So far, only KVM_TDX_INIT_VM can get -EAGAIN due to KVM side TDH_MNG_CREATE
> gets TDX_RND_NO_ENTROPY because Random number generation (e.g., RDRAND or
> RDSEED) failed and in this case it should retry.
Ok, no problem.
> I think adding a commment to explain why it can get -EAGAIN and needs to
> retry should suffice?
Sure, a comment is useful.
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
© 2016 - 2026 Red Hat, Inc.