From: Joao Martins <joao.m.martins@oracle.com>
Handle the hypercall to set a per vcpu info, and also wire up the default
vcpu_info in the shared_info page for the first 32 vCPUs.
To avoid deadlock within KVM a vCPU thread must set its *own* vcpu_info
rather than it being set from the context in which the hypercall is
invoked.
Add the vcpu_info (and default) GPA to the vmstate_x86_cpu for migration,
and restore it in kvm_arch_put_registers() appropriately.
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
target/i386/cpu.h | 2 ++
target/i386/kvm/kvm.c | 19 +++++++++++
target/i386/machine.c | 21 ++++++++++++
target/i386/trace-events | 1 +
target/i386/xen.c | 74 +++++++++++++++++++++++++++++++++++++---
target/i386/xen.h | 1 +
6 files changed, 113 insertions(+), 5 deletions(-)
diff --git a/target/i386/cpu.h b/target/i386/cpu.h
index c6c57baed5..109b2e5669 100644
--- a/target/i386/cpu.h
+++ b/target/i386/cpu.h
@@ -1788,6 +1788,8 @@ typedef struct CPUArchState {
#endif
#if defined(CONFIG_KVM)
struct kvm_nested_state *nested_state;
+ uint64_t xen_vcpu_info_gpa;
+ uint64_t xen_vcpu_info_default_gpa;
#endif
#if defined(CONFIG_HVF)
HVFX86LazyFlags hvf_lflags;
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index ebde6bc204..fa45e2f99a 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -1811,6 +1811,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
has_msr_hv_hypercall = true;
}
+ env->xen_vcpu_info_gpa = UINT64_MAX;
+ env->xen_vcpu_info_default_gpa = UINT64_MAX;
+
xen_version = kvm_arch_xen_version(MACHINE(qdev_get_machine()));
if (xen_version) {
#ifdef CONFIG_XEN_EMU
@@ -4728,6 +4731,22 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
kvm_arch_set_tsc_khz(cpu);
}
+#ifdef CONFIG_XEN_EMU
+ if (level == KVM_PUT_FULL_STATE) {
+ uint64_t gpa = x86_cpu->env.xen_vcpu_info_gpa;
+ if (gpa == UINT64_MAX) {
+ gpa = x86_cpu->env.xen_vcpu_info_default_gpa;
+ }
+
+ if (gpa != UINT64_MAX) {
+ ret = kvm_xen_set_vcpu_attr(cpu, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ }
+#endif
+
ret = kvm_getput_regs(x86_cpu, 1);
if (ret < 0) {
return ret;
diff --git a/target/i386/machine.c b/target/i386/machine.c
index 310b125235..104cd6047c 100644
--- a/target/i386/machine.c
+++ b/target/i386/machine.c
@@ -1257,6 +1257,26 @@ static const VMStateDescription vmstate_nested_state = {
}
};
+static bool xen_vcpu_needed(void *opaque)
+{
+ X86CPU *cpu = opaque;
+ CPUX86State *env = &cpu->env;
+
+ return (env->xen_vcpu_info_gpa != UINT64_MAX ||
+ env->xen_vcpu_info_default_gpa != UINT64_MAX);
+}
+
+static const VMStateDescription vmstate_xen_vcpu = {
+ .name = "cpu/xen_vcpu",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .needed = xen_vcpu_needed,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(env.xen_vcpu_info_gpa, X86CPU),
+ VMSTATE_UINT64(env.xen_vcpu_info_default_gpa, X86CPU),
+ VMSTATE_END_OF_LIST()
+ }
+};
#endif
static bool mcg_ext_ctl_needed(void *opaque)
@@ -1716,6 +1736,7 @@ const VMStateDescription vmstate_x86_cpu = {
#endif
#ifdef CONFIG_KVM
&vmstate_nested_state,
+ &vmstate_xen_vcpu,
#endif
&vmstate_msr_tsx_ctrl,
&vmstate_msr_intel_sgx,
diff --git a/target/i386/trace-events b/target/i386/trace-events
index fb999d0052..7118640697 100644
--- a/target/i386/trace-events
+++ b/target/i386/trace-events
@@ -15,3 +15,4 @@ kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data
# target/i386/xen.c
kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64
kvm_xen_set_shared_info(uint64_t gfn) "shared info at gfn 0x%" PRIx64
+kvm_xen_set_vcpu_attr(int cpu, int type, uint64_t gpa) "vcpu attr cpu %d type %d gpa 0x%" PRIx64
diff --git a/target/i386/xen.c b/target/i386/xen.c
index 9d1daadee1..cd816bb711 100644
--- a/target/i386/xen.c
+++ b/target/i386/xen.c
@@ -129,10 +129,47 @@ static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
return true;
}
+int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
+{
+ struct kvm_xen_vcpu_attr xhsi;
+
+ xhsi.type = type;
+ xhsi.u.gpa = gpa;
+
+ trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
+
+ return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
+}
+
+static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
+{
+ X86CPU *cpu = X86_CPU(cs);
+ CPUX86State *env = &cpu->env;
+
+ env->xen_vcpu_info_default_gpa = data.host_ulong;
+
+ /* Changing the default does nothing if a vcpu_info was explicitly set. */
+ if (env->xen_vcpu_info_gpa == UINT64_MAX) {
+ kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
+ env->xen_vcpu_info_default_gpa);
+ }
+}
+
+static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
+{
+ X86CPU *cpu = X86_CPU(cs);
+ CPUX86State *env = &cpu->env;
+
+ env->xen_vcpu_info_gpa = data.host_ulong;
+
+ kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
+ env->xen_vcpu_info_gpa);
+}
+
static int xen_set_shared_info(CPUState *cs, uint64_t gfn)
{
uint64_t gpa = gfn << TARGET_PAGE_BITS;
- int err;
+ int i, err;
/* The xen_overlay device tells KVM about it too, since it had to
* do that on migration load anyway (unless we're going to jump
@@ -144,6 +181,14 @@ static int xen_set_shared_info(CPUState *cs, uint64_t gfn)
trace_kvm_xen_set_shared_info(gfn);
+ for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
+ CPUState *cpu = qemu_get_cpu(i);
+ if (cpu) {
+ async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
+ }
+ gpa += sizeof(vcpu_info_t);
+ }
+
return err;
}
@@ -195,19 +240,38 @@ static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit,
}
}
+static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
+ uint64_t arg)
+{
+ struct vcpu_register_vcpu_info rvi;
+ uint64_t gpa;
+
+ if (!target)
+ return -ENOENT;
+
+ if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
+ return -EFAULT;
+ }
+
+ gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
+ async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
+ return 0;
+}
+
static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
int cmd, int vcpu_id, uint64_t arg)
{
+ CPUState *dest = qemu_get_cpu(vcpu_id);
+ CPUState *cs = CPU(cpu);
int err;
switch (cmd) {
case VCPUOP_register_vcpu_info:
- /* no vcpu info placement for now */
- err = -ENOSYS;
- break;
+ err = vcpuop_register_vcpu_info(cs, dest, arg);
+ break;
default:
- return false;
+ return false;
}
exit->u.hcall.result = err;
diff --git a/target/i386/xen.h b/target/i386/xen.h
index 9134d78685..53573e07f8 100644
--- a/target/i386/xen.h
+++ b/target/i386/xen.h
@@ -24,5 +24,6 @@
int kvm_xen_init(KVMState *s, uint32_t xen_version);
int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit);
+int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa);
#endif /* QEMU_I386_XEN_H */
--
2.35.3
On 09/12/2022 09:56, David Woodhouse wrote:
> From: Joao Martins <joao.m.martins@oracle.com>
>
> Handle the hypercall to set a per vcpu info, and also wire up the default
> vcpu_info in the shared_info page for the first 32 vCPUs.
>
> To avoid deadlock within KVM a vCPU thread must set its *own* vcpu_info
> rather than it being set from the context in which the hypercall is
> invoked.
>
> Add the vcpu_info (and default) GPA to the vmstate_x86_cpu for migration,
> and restore it in kvm_arch_put_registers() appropriately.
>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
> ---
> target/i386/cpu.h | 2 ++
> target/i386/kvm/kvm.c | 19 +++++++++++
> target/i386/machine.c | 21 ++++++++++++
> target/i386/trace-events | 1 +
> target/i386/xen.c | 74 +++++++++++++++++++++++++++++++++++++---
> target/i386/xen.h | 1 +
> 6 files changed, 113 insertions(+), 5 deletions(-)
>
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index c6c57baed5..109b2e5669 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -1788,6 +1788,8 @@ typedef struct CPUArchState {
> #endif
> #if defined(CONFIG_KVM)
> struct kvm_nested_state *nested_state;
> + uint64_t xen_vcpu_info_gpa;
> + uint64_t xen_vcpu_info_default_gpa;
> #endif
> #if defined(CONFIG_HVF)
> HVFX86LazyFlags hvf_lflags;
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index ebde6bc204..fa45e2f99a 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -1811,6 +1811,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
> has_msr_hv_hypercall = true;
> }
>
> + env->xen_vcpu_info_gpa = UINT64_MAX;
> + env->xen_vcpu_info_default_gpa = UINT64_MAX;
There was an INVALID_GPA definition for shared info. Looks like we could
use it here too.
> +
> xen_version = kvm_arch_xen_version(MACHINE(qdev_get_machine()));
> if (xen_version) {
> #ifdef CONFIG_XEN_EMU
> @@ -4728,6 +4731,22 @@ int kvm_arch_put_registers(CPUState *cpu, int level)
> kvm_arch_set_tsc_khz(cpu);
> }
>
> +#ifdef CONFIG_XEN_EMU
> + if (level == KVM_PUT_FULL_STATE) {
> + uint64_t gpa = x86_cpu->env.xen_vcpu_info_gpa;
> + if (gpa == UINT64_MAX) {
> + gpa = x86_cpu->env.xen_vcpu_info_default_gpa;
> + }
> +
> + if (gpa != UINT64_MAX) {
> + ret = kvm_xen_set_vcpu_attr(cpu, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
> + if (ret < 0) {
> + return ret;
> + }
> + }
> + }
> +#endif
> +
> ret = kvm_getput_regs(x86_cpu, 1);
> if (ret < 0) {
> return ret;
[snip]
> @@ -195,19 +240,38 @@ static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit,
> }
> }
>
> +static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
> + uint64_t arg)
> +{
> + struct vcpu_register_vcpu_info rvi;
> + uint64_t gpa;
> +
> + if (!target)
> + return -ENOENT;
> +
> + if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
> + return -EFAULT;
> + }
> +
> + gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
Some sanity checks wouldn't go a miss here...
rvi.offset should:
a) be < TARGET_PAGE_SIZE, and
b) ba aligned to vcpu_info_t size
Paul
> + async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
> + return 0;
> +}
> +
On Mon, 2022-12-12 at 14:58 +0000, Paul Durrant wrote:
> On 09/12/2022 09:56, David Woodhouse wrote:
> > From: Joao Martins <
> > joao.m.martins@oracle.com
> > >
> >
> > Handle the hypercall to set a per vcpu info, and also wire up the
> > default
> > vcpu_info in the shared_info page for the first 32 vCPUs.
> >
> > To avoid deadlock within KVM a vCPU thread must set its *own*
> > vcpu_info
> > rather than it being set from the context in which the hypercall is
> > invoked.
> >
> > Add the vcpu_info (and default) GPA to the vmstate_x86_cpu for
> > migration,
> > and restore it in kvm_arch_put_registers() appropriately.
> >
> > Signed-off-by: Joao Martins <
> > joao.m.martins@oracle.com
> > >
> > Signed-off-by: David Woodhouse <
> > dwmw@amazon.co.uk
> > >
> > ---
> > target/i386/cpu.h | 2 ++
> > target/i386/kvm/kvm.c | 19 +++++++++++
> > target/i386/machine.c | 21 ++++++++++++
> > target/i386/trace-events | 1 +
> > target/i386/xen.c | 74
> > +++++++++++++++++++++++++++++++++++++---
> > target/i386/xen.h | 1 +
> > 6 files changed, 113 insertions(+), 5 deletions(-)
> >
> > diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> > index c6c57baed5..109b2e5669 100644
> > --- a/target/i386/cpu.h
> > +++ b/target/i386/cpu.h
> > @@ -1788,6 +1788,8 @@ typedef struct CPUArchState {
> > #endif
> > #if defined(CONFIG_KVM)
> > struct kvm_nested_state *nested_state;
> > + uint64_t xen_vcpu_info_gpa;
> > + uint64_t xen_vcpu_info_default_gpa;
> > #endif
> > #if defined(CONFIG_HVF)
> > HVFX86LazyFlags hvf_lflags;
> > diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> > index ebde6bc204..fa45e2f99a 100644
> > --- a/target/i386/kvm/kvm.c
> > +++ b/target/i386/kvm/kvm.c
> > @@ -1811,6 +1811,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
> > has_msr_hv_hypercall = true;
> > }
> >
> > + env->xen_vcpu_info_gpa = UINT64_MAX;
> > + env->xen_vcpu_info_default_gpa = UINT64_MAX;
>
>
> There was an INVALID_GPA definition for shared info. Looks like we
> could use it here too.
There was, and I started trying to use it, but it fell foul of the "is
this going to live in target/ or hw/ and who can include what from
where?" and I decided to just use UINT64_MAX for now and keep typing.
That will work out in the end, I'm sure.
> Some sanity checks wouldn't go a miss here...
>
> rvi.offset should:
> a) be < TARGET_PAGE_SIZE, and
> b) ba aligned to vcpu_info_t size
Ack.
On 13/12/2022 00:13, David Woodhouse wrote:
> On Mon, 2022-12-12 at 14:58 +0000, Paul Durrant wrote:
>> On 09/12/2022 09:56, David Woodhouse wrote:
>>> From: Joao Martins <
>>> joao.m.martins@oracle.com
>>>>
>>>
>>> Handle the hypercall to set a per vcpu info, and also wire up the
>>> default
>>> vcpu_info in the shared_info page for the first 32 vCPUs.
>>>
>>> To avoid deadlock within KVM a vCPU thread must set its *own*
>>> vcpu_info
>>> rather than it being set from the context in which the hypercall is
>>> invoked.
>>>
>>> Add the vcpu_info (and default) GPA to the vmstate_x86_cpu for
>>> migration,
>>> and restore it in kvm_arch_put_registers() appropriately.
>>>
>>> Signed-off-by: Joao Martins <
>>> joao.m.martins@oracle.com
>>>>
>>> Signed-off-by: David Woodhouse <
>>> dwmw@amazon.co.uk
>>>>
>>> ---
>>> target/i386/cpu.h | 2 ++
>>> target/i386/kvm/kvm.c | 19 +++++++++++
>>> target/i386/machine.c | 21 ++++++++++++
>>> target/i386/trace-events | 1 +
>>> target/i386/xen.c | 74
>>> +++++++++++++++++++++++++++++++++++++---
>>> target/i386/xen.h | 1 +
>>> 6 files changed, 113 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
>>> index c6c57baed5..109b2e5669 100644
>>> --- a/target/i386/cpu.h
>>> +++ b/target/i386/cpu.h
>>> @@ -1788,6 +1788,8 @@ typedef struct CPUArchState {
>>> #endif
>>> #if defined(CONFIG_KVM)
>>> struct kvm_nested_state *nested_state;
>>> + uint64_t xen_vcpu_info_gpa;
>>> + uint64_t xen_vcpu_info_default_gpa;
>>> #endif
>>> #if defined(CONFIG_HVF)
>>> HVFX86LazyFlags hvf_lflags;
>>> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
>>> index ebde6bc204..fa45e2f99a 100644
>>> --- a/target/i386/kvm/kvm.c
>>> +++ b/target/i386/kvm/kvm.c
>>> @@ -1811,6 +1811,9 @@ int kvm_arch_init_vcpu(CPUState *cs)
>>> has_msr_hv_hypercall = true;
>>> }
>>>
>>> + env->xen_vcpu_info_gpa = UINT64_MAX;
>>> + env->xen_vcpu_info_default_gpa = UINT64_MAX;
>>
>>
>> There was an INVALID_GPA definition for shared info. Looks like we
>> could use it here too.
>
> There was, and I started trying to use it, but it fell foul of the "is
> this going to live in target/ or hw/ and who can include what from
> where?" and I decided to just use UINT64_MAX for now and keep typing.
>
> That will work out in the end, I'm sure.
Hopefully
https://lore.kernel.org/lkml/20221209023622.274715-1-yu.c.zhang@linux.intel.com/
will help.
>
>> Some sanity checks wouldn't go a miss here...
>>
>> rvi.offset should:
>> a) be < TARGET_PAGE_SIZE, and
>> b) ba aligned to vcpu_info_t size
>
> Ack.
On Wed, 2022-12-14 at 10:28 +0000, Paul Durrant wrote: > On 13/12/2022 00:13, David Woodhouse wrote: > > On Mon, 2022-12-12 at 14:58 +0000, Paul Durrant wrote: > > > On 09/12/2022 09:56, David Woodhouse wrote: > > > > > > > > @@ -1811,6 +1811,9 @@ int kvm_arch_init_vcpu(CPUState *cs) > > > > has_msr_hv_hypercall = true; > > > > } > > > > > > > > + env->xen_vcpu_info_gpa = UINT64_MAX; > > > > + env->xen_vcpu_info_default_gpa = UINT64_MAX; > > > > > > > > > There was an INVALID_GPA definition for shared info. Looks like we > > > could use it here too. > > > > There was, and I started trying to use it, but it fell foul of the "is > > this going to live in target/ or hw/ and who can include what from > > where?" and I decided to just use UINT64_MAX for now and keep typing. > > > > That will work out in the end, I'm sure. > > Hopefully > https://lore.kernel.org/lkml/20221209023622.274715-1-yu.c.zhang@linux.intel.com/ > > will help. Those are kernel-internal; not in uapi headers. Although maybe they *should* be uapi, at least for the KVM/Xen support because they are actually part of the userspace ABI. The kernel returns GFN_INVALID when queried about the shared info page if it isn't set, or GPA_INVALID when queried about vcpu_info etc. (Those are the same numerically but semantically subtly different, and it hurts my brain that GFN_INVALID != GPA_INVALID >> PAGE_SHIFT.) Userspace can also *set* those fields to Gxx_INVALID. Unlike the Xen APIs which don't allow them to be turned off, we implement SHUTDOWN_soft_reset in the userspace VMM so it needs to be able to turn the shinfo areas off.
© 2016 - 2026 Red Hat, Inc.