On 10/01/2023 12:20, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
>
> Xen will "latch" the guest's 32-bit or 64-bit ("long mode") setting when
> the guest writes the MSR to fill in the hypercall page, or when the guest
> sets the event channel callback in HVM_PARAM_CALLBACK_IRQ.
>
> KVM handles the former and sets the kernel's long_mode flag accordingly.
> The latter will be handled in userspace. Keep them in sync by noticing
> when a hypercall is made in a mode that doesn't match qemu's idea of
> the guest mode, and resyncing from the kernel. Do that same sync right
> before serialization too, in case the guest has set the hypercall page
> but hasn't yet made a system call.
>
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Reviewed-by: Paul Durrant <paul@xen.org>
... with one suggestion...
> ---
> hw/i386/kvm/xen_overlay.c | 65 +++++++++++++++++++++++++++++++++++++++
> hw/i386/kvm/xen_overlay.h | 4 +++
> target/i386/kvm/xen-emu.c | 12 ++++++++
> 3 files changed, 81 insertions(+)
>
> diff --git a/hw/i386/kvm/xen_overlay.c b/hw/i386/kvm/xen_overlay.c
> index 3e85bf912f..6fd63ff906 100644
> --- a/hw/i386/kvm/xen_overlay.c
> +++ b/hw/i386/kvm/xen_overlay.c
> @@ -44,6 +44,7 @@ struct XenOverlayState {
> MemoryRegion shinfo_mem;
> void *shinfo_ptr;
> uint64_t shinfo_gpa;
> + bool long_mode;
> };
>
> struct XenOverlayState *xen_overlay_singleton;
> @@ -96,9 +97,21 @@ static void xen_overlay_realize(DeviceState *dev, Error **errp)
>
> s->shinfo_ptr = memory_region_get_ram_ptr(&s->shinfo_mem);
> s->shinfo_gpa = INVALID_GPA;
> + s->long_mode = false;
> memset(s->shinfo_ptr, 0, XEN_PAGE_SIZE);
> }
>
> +static int xen_overlay_pre_save(void *opaque)
> +{
> + /*
> + * Fetch the kernel's idea of long_mode to avoid the race condition
> + * where the guest has set the hypercall page up in 64-bit mode but
> + * not yet made a hypercall by the time migration happens, so qemu
> + * hasn't yet noticed.
> + */
> + return xen_sync_long_mode();
> +}
> +
> static int xen_overlay_post_load(void *opaque, int version_id)
> {
> XenOverlayState *s = opaque;
> @@ -107,6 +120,9 @@ static int xen_overlay_post_load(void *opaque, int version_id)
> xen_overlay_map_page_locked(&s->shinfo_mem, s->shinfo_gpa);
> xen_overlay_set_be_shinfo(s->shinfo_gpa >> XEN_PAGE_SHIFT);
> }
> + if (s->long_mode) {
> + xen_set_long_mode(true);
> + }
>
> return 0;
> }
> @@ -121,9 +137,11 @@ static const VMStateDescription xen_overlay_vmstate = {
> .version_id = 1,
> .minimum_version_id = 1,
> .needed = xen_overlay_is_needed,
> + .pre_save = xen_overlay_pre_save,
> .post_load = xen_overlay_post_load,
> .fields = (VMStateField[]) {
> VMSTATE_UINT64(shinfo_gpa, XenOverlayState),
> + VMSTATE_BOOL(long_mode, XenOverlayState),
> VMSTATE_END_OF_LIST()
> }
> };
> @@ -198,3 +216,50 @@ void *xen_overlay_get_shinfo_ptr(void)
>
> return s->shinfo_ptr;
> }
> +
> +int xen_sync_long_mode(void)
> +{
> + int ret;
> + struct kvm_xen_hvm_attr xa = {
> + .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
> + };
> +
> + if (!xen_overlay_singleton) {
> + return -ENOENT;
> + }
> +
> + ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_GET_ATTR, &xa);
> + if (!ret) {
> + xen_overlay_singleton->long_mode = xa.u.long_mode;
> + }
> +
> + return ret;
> +}
> +
> +int xen_set_long_mode(bool long_mode)
> +{
> + int ret;
> + struct kvm_xen_hvm_attr xa = {
> + .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
> + .u.long_mode = long_mode,
> + };
> +
> + if (!xen_overlay_singleton) {
> + return -ENOENT;
> + }
> +
> + ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &xa);
> + if (!ret) {
> + xen_overlay_singleton->long_mode = xa.u.long_mode;
> + }
> +
> + return ret;
> +}
> +
> +bool xen_is_long_mode(void)
> +{
> + if (xen_overlay_singleton) {
> + return xen_overlay_singleton->long_mode;
> + }
> + return false;
return xen_overlay_singleton && xen_overlay_singleton->long_mode;
perhaps?
> +}
> diff --git a/hw/i386/kvm/xen_overlay.h b/hw/i386/kvm/xen_overlay.h
> index 00cff05bb0..5c46a0b036 100644
> --- a/hw/i386/kvm/xen_overlay.h
> +++ b/hw/i386/kvm/xen_overlay.h
> @@ -17,4 +17,8 @@ void xen_overlay_create(void);
> int xen_overlay_map_shinfo_page(uint64_t gpa);
> void *xen_overlay_get_shinfo_ptr(void);
>
> +int xen_sync_long_mode(void);
> +int xen_set_long_mode(bool long_mode);
> +bool xen_is_long_mode(void);
> +
> #endif /* QEMU_XEN_OVERLAY_H */
> diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
> index 80005ea527..80f09f33df 100644
> --- a/target/i386/kvm/xen-emu.c
> +++ b/target/i386/kvm/xen-emu.c
> @@ -19,6 +19,8 @@
> #include "trace.h"
> #include "sysemu/runstate.h"
>
> +#include "hw/i386/kvm/xen_overlay.h"
> +
> #include "standard-headers/xen/version.h"
> #include "standard-headers/xen/sched.h"
>
> @@ -274,6 +276,16 @@ int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
> return -1;
> }
>
> + /*
> + * The kernel latches the guest 32/64 mode when the MSR is used to fill
> + * the hypercall page. So if we see a hypercall in a mode that doesn't
> + * match our own idea of the guest mode, fetch the kernel's idea of the
> + * "long mode" to remain in sync.
> + */
> + if (exit->u.hcall.longmode != xen_is_long_mode()) {
> + xen_sync_long_mode();
> + }
> +
> if (!do_kvm_xen_handle_exit(cpu, exit)) {
> /*
> * Some hypercalls will be deliberately "implemented" by returning