[PATCH v7 00/51] Xen support under KVM

David Woodhouse posted 51 patches 1 year, 3 months ago
Patches applied successfully (tree, apply log)
git fetch https://github.com/patchew-project/qemu tags/patchew/20230116215805.1123514-1-dwmw2@infradead.org
Maintainers: Paolo Bonzini <pbonzini@redhat.com>, Stefano Stabellini <sstabellini@kernel.org>, Anthony Perard <anthony.perard@citrix.com>, Paul Durrant <paul@xen.org>, "Dr. David Alan Gilbert" <dgilbert@redhat.com>, "Michael S. Tsirkin" <mst@redhat.com>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, Richard Henderson <richard.henderson@linaro.org>, Eduardo Habkost <eduardo@habkost.net>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>, Thomas Huth <thuth@redhat.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Markus Armbruster <armbru@redhat.com>, Eric Blake <eblake@redhat.com>, Marcelo Tosatti <mtosatti@redhat.com>
accel/kvm/kvm-all.c                                |    2 +
accel/xen/xen-all.c                                |    2 +
hmp-commands.hx                                    |   29 +
hw/Kconfig                                         |    1 +
hw/i386/Kconfig                                    |    5 +
hw/i386/kvm/meson.build                            |   10 +
hw/i386/kvm/xen-stubs.c                            |   25 +
hw/i386/kvm/xen_evtchn.c                           | 1707 ++++++++++++++++++++
hw/i386/kvm/xen_evtchn.h                           |   68 +
hw/i386/kvm/xen_gnttab.c                           |  238 +++
hw/i386/kvm/xen_gnttab.h                           |   25 +
hw/i386/kvm/xen_overlay.c                          |  262 +++
hw/i386/kvm/xen_overlay.h                          |   26 +
hw/i386/kvm/xen_xenstore.c                         |  465 ++++++
hw/i386/kvm/xen_xenstore.h                         |   20 +
hw/i386/pc.c                                       |   23 +
hw/i386/xen/meson.build                            |    5 +-
hw/i386/xen/xen_platform.c                         |   57 +-
hw/xen/Kconfig                                     |    3 +
include/hw/i386/pc.h                               |    3 +
include/hw/xen/xen.h                               |   21 +-
include/standard-headers/xen/arch-x86/cpuid.h      |  118 ++
include/standard-headers/xen/arch-x86/xen-x86_32.h |  194 +++
include/standard-headers/xen/arch-x86/xen-x86_64.h |  241 +++
include/standard-headers/xen/arch-x86/xen.h        |  398 +++++
include/standard-headers/xen/event_channel.h       |  388 +++++
include/standard-headers/xen/features.h            |  143 ++
include/standard-headers/xen/grant_table.h         |  686 ++++++++
include/standard-headers/xen/hvm/hvm_op.h          |  395 +++++
include/standard-headers/xen/hvm/params.h          |  318 ++++
include/standard-headers/xen/io/blkif.h            |  722 +++++++++
include/standard-headers/xen/io/console.h          |   56 +
include/standard-headers/xen/io/fbif.h             |  176 ++
include/standard-headers/xen/io/kbdif.h            |  576 +++++++
include/standard-headers/xen/io/netif.h            | 1102 +++++++++++++
include/standard-headers/xen/io/protocols.h        |   42 +
include/standard-headers/xen/io/ring.h             |  495 ++++++
include/standard-headers/xen/io/usbif.h            |  425 +++++
include/standard-headers/xen/io/xenbus.h           |   80 +
include/standard-headers/xen/io/xs_wire.h          |  153 ++
include/standard-headers/xen/memory.h              |  754 +++++++++
include/standard-headers/xen/physdev.h             |  383 +++++
include/standard-headers/xen/sched.h               |  202 +++
include/standard-headers/xen/trace.h               |  341 ++++
include/standard-headers/xen/vcpu.h                |  248 +++
include/standard-headers/xen/version.h             |  113 ++
include/standard-headers/xen/xen-compat.h          |   46 +
include/standard-headers/xen/xen.h                 | 1049 ++++++++++++
include/sysemu/kvm_int.h                           |    3 +
include/sysemu/kvm_xen.h                           |   40 +
meson.build                                        |    1 +
monitor/misc.c                                     |    4 +
qapi/misc.json                                     |   91 ++
softmmu/globals.c                                  |    2 +-
target/i386/cpu.c                                  |    1 +
target/i386/cpu.h                                  |   19 +
target/i386/kvm/kvm.c                              |  208 ++-
target/i386/kvm/meson.build                        |    2 +
target/i386/kvm/trace-events                       |    6 +
target/i386/kvm/xen-compat.h                       |   51 +
target/i386/kvm/xen-emu.c                          | 1698 +++++++++++++++++++
target/i386/kvm/xen-emu.h                          |   33 +
target/i386/machine.c                              |   25 +
63 files changed, 15001 insertions(+), 24 deletions(-)
[PATCH v7 00/51] Xen support under KVM
Posted by David Woodhouse 1 year, 3 months ago
Even before this is merged, we've been able to use it to find bugs in
the Linux Xen guest support, and test the fixes.
https://lore.kernel.org/all/4bffa69a949bfdc92c4a18e5a1c3cbb3b94a0d32.camel@infradead.org/
https://lore.kernel.org/all/871qnunycr.ffs@tglx/

Version 7 is some trivial cosmetics, and a locking fix which mostly
paves the way for the PIRQ support which will be reposted as RFC on
top, shortly.

   qemu-system-x86_64 -serial mon:stdio -M pc -cpu host \
    -accel kvm,xen-version=0x4000a,kernel-irqchip=split -display none \
    -kernel vmlinuz-5.17.8-200.fc35.x86_64 \
    -append "console=ttyS0 root=/dev/sda1 xen_emul_unplug=never" \
    -d unimp -m 1G -smp 2 -device xen-platform \
    -drive file=/var/lib/libvirt/images/fedora28.qcow2,if=none,id=disk \
    -device ahci,id=ahci -device ide-hd,drive=disk,bus=ahci.0

v7: https://git.infradead.org/users/dwmw2/qemu.git/shortlog/refs/heads/xenfv-kvm-7

 • Trivial review feedback and collected ack/review tags.

 • Only call qemu_set_irq() under the BQL, which means doing so from a BH
   in some circumstances.

v6: https://lore.kernel.org/qemu-devel/20230110122042.1562155-1-dwmw2@infradead.org/
    https://git.infradead.org/users/dwmw2/qemu.git/shortlog/refs/heads/xenfv-kvm-6

 • Require split irqchip to ensure the GSI handling works correctly.

 • Rework monitor commands to be QMP-based.

 • Cache vcpu_info hva to avoid MemoryRegion refcount leaks.

 • Pull in more Xen headers to allow for later PV backend work.

 • Define __XEN_TOOLS__ in hw/xen/xen.h instead of littering C files with
   separate definitions of __XEN_INTERFACE_VERSION__.

 • Drop debugging hexdump from xenstore processing.

 • Minor fixes in event channel backend handling.

 • Drop "Refactor xen_be_init()" patch. It turns out we're going to do that
   all quite differently, so it's neither necessary nor sufficient.

v5: https://lore.kernel.org/qemu-devel/20221230121235.1282915-1-dwmw2@infradead.org/
    https://git.infradead.org/users/dwmw2/qemu.git/shortlog/refs/heads/xenfv-kvm-5

 • Add backend implementation of event channel support, to parallel the
   libxenevtchn API used by existing backend drivers.

 • Add basic XenStore ring implementation, test migration and kexec.

 • Some kexec/soft reset fixes (clear port pending bits, kernel timer virq).
 
 • Fix race with setting the xen_callback_asserted flag before actually
   doing so, which could lead to it being *cleared* again before we even
   assert it... and leave it asserted for ever.

v4: https://lore.kernel.org/qemu-devel/20221221010623.1000191-1-dwmw2@infradead.org/
    https://git.infradead.org/users/dwmw2/qemu.git/shortlog/refs/heads/xenfv-kvm-4

 • Add soft reset support near the beginning and thread it through the
   rest of the feature enablement.

 • Add PV timer	support	and advertise XENFEAT_safe_hvm_pvclock.

 • Add basic grant table mapping and [gs]et_version / query_size support.

 • Make	xen_platform device build (and work) without CONFIG_XEN.

 • Fix Xen HVM mode not to require --xen-attach.

v3: https://lore.kernel.org/qemu-devel/20221216004117.862106-1-dwmw2@infradead.org/
    https://git.infradead.org/users/dwmw2/qemu.git/shortlog/refs/heads/xenfv-kvm-3

 • Switch back to xen-version as KVM accelerator property, other review
   feedback and bug fixes.

 • Fix Hyper-V coexistence (ick, calling kvm_xen_init() again because
   hyperv_enabled() doesn't return the right answer the first time).

 • Implement event channel support, including GSI/PCI_INTX callback.

 • Implement 32-bit guest support.

v2: https://lore.kernel.org/qemu-devel/20221209095612.689243-1-dwmw2@infradead.org/
    https://git.infradead.org/users/dwmw2/qemu.git/shortlog/refs/heads/xenfv-kvm-2

 • Attempt to implement migration support; every Xen enlightenment is
   now recorded either from vmstate_x86_cpu or from a new sysdev device
   created for that purpose. And — I believe — correctly restored, in
   the right order, on vmload.

 • The shared_info page is created as a proper overlay instead of abusing
   the underlying guest page. This is important because Windows doesn't
   even select a GPA which had RAM behind it beforehand. This will be
   extended to handle the grant frames too, in the fullness of time.

 • Set vCPU attributes from the correct vCPU thread to avoid deadlocks.

 • Carefully copy the entire hypercall argument structure from userspace
   instead of assuming that it's contiguous in HVA space.

 • Distinguish between "handled but intentionally returns -ENOSYS" and
   "no idea what that was" in hypercalls, allowing us to emit a
   GUEST_ERROR (actually, shouldn't that change to UNIMP?) on the
   latter. Experience shows that to we'll end up having to intentionally
   return -ENOSYS to a bunch of weird crap that ancient guests still
   attempt to use, including XenServer local hacks that nobody even
   remembers what they were (hvmop 0x101, anyone? Some old Windows
   PV driver appears to be trying to use it...).

 * Drop the '+xen' CPU property and present Xen CPUID instead of KVM
   unconditionally when running in Xen mode. Make the Xen CPUID coexist
   with Hyper-V CPUID as it should, though.

 • Add XEN_EMU and XENFV_MACHINE (the latter to be XEN_EMU||XEN) config
   options. Some more work on this, and the incestuous relationships
   between the KVM target code and the 'platform' code, is going to be
   required but it's probably better to get on with implementing the
   real code so we can see those interactions in all their glory,
   before losing too much sleep over the details here.

 • Drop the GSI-2 hack, and also the patch which made the PCI platform
   device have real RAM (which isn't needed now we have overlays, qv).

 • Drop the XenState and XenVcpuState from KVMState and CPUArchState
   respectively. The Xen-specific fields are natively included in
   CPUArchState now though, for migration purposes. And we don't
   keep a host pointer to the shared_info or vcpu_info at all any
   more. With the kernel doing everything for us, we don't actually
   need them.

v1: https://lore.kernel.org/qemu-devel/20221205173137.607044-1-dwmw2@infradead.org/
    https://git.infradead.org/users/dwmw2/qemu.git/shortlog/refs/heads/xenfv-kvm-1

v0: https://github.com/jpemartins/qemu/commits/xen-shim-rfc (Joao et al.)

Ankur Arora (2):
      i386/xen: implement HVMOP_set_evtchn_upcall_vector
      i386/xen: implement HVMOP_set_param

David Woodhouse (32):
      xen: add CONFIG_XENFV_MACHINE and CONFIG_XEN_EMU options for Xen emulation
      xen: Add XEN_DISABLED mode and make it default
      i386/kvm: Add xen-version KVM accelerator property and init KVM Xen support
      i386/hvm: Set Xen vCPU ID in KVM
      i386/xen: Implement SCHEDOP_poll and SCHEDOP_yield
      hw/xen: Add xen_overlay device for emulating shared xenheap pages
      i386/xen: add pc_machine_kvm_type to initialize XEN_EMULATE mode
      i386/xen: manage and save/restore Xen guest long_mode setting
      i386/xen: implement XENMEM_add_to_physmap_batch
      hw/xen: Add xen_evtchn device for event channel emulation
      i386/xen: Add support for Xen event channel delivery to vCPU
      hw/xen: Implement EVTCHNOP_status
      hw/xen: Implement EVTCHNOP_close
      hw/xen: Implement EVTCHNOP_unmask
      hw/xen: Implement EVTCHNOP_bind_virq
      hw/xen: Implement EVTCHNOP_bind_ipi
      hw/xen: Implement EVTCHNOP_send
      hw/xen: Implement EVTCHNOP_alloc_unbound
      hw/xen: Implement EVTCHNOP_bind_interdomain
      hw/xen: Implement EVTCHNOP_bind_vcpu
      hw/xen: Implement EVTCHNOP_reset
      hw/xen: Support HVM_PARAM_CALLBACK_TYPE_GSI callback
      hw/xen: Support HVM_PARAM_CALLBACK_TYPE_PCI_INTX callback
      kvm/i386: Add xen-gnttab-max-frames property
      hw/xen: Add xen_gnttab device for grant table emulation
      hw/xen: Support mapping grant frames
      i386/xen: Implement HYPERVISOR_grant_table_op and GNTTABOP_[gs]et_verson
      hw/xen: Implement GNTTABOP_query_size
      i386/xen: Reserve Xen special pages for console, xenstore rings
      hw/xen: Add backend implementation of interdomain event channel support
      hw/xen: Add xen_xenstore device for xenstore emulation
      hw/xen: Add basic ring handling to xenstore

Joao Martins (17):
      include: import Xen public headers to include/standard-headers/
      i386/kvm: handle Xen HVM cpuid leaves
      xen-platform: exclude vfio-pci from the PCI platform unplug
      xen-platform: allow its creation with XEN_EMULATE mode
      i386/xen: handle guest hypercalls
      i386/xen: implement HYPERVISOR_xen_version
      i386/xen: implement HYPERVISOR_sched_op, SCHEDOP_shutdown
      i386/xen: implement HYPERVISOR_memory_op
      i386/xen: implement HYPERVISOR_hvm_op
      i386/xen: implement HYPERVISOR_vcpu_op
      i386/xen: handle VCPUOP_register_vcpu_info
      i386/xen: handle VCPUOP_register_vcpu_time_info
      i386/xen: handle VCPUOP_register_runstate_memory_area
      i386/xen: implement HYPERVISOR_event_channel_op
      i386/xen: add monitor commands to test event injection
      i386/xen: handle PV timer hypercalls
      i386/xen: handle HVMOP_get_param

 accel/kvm/kvm-all.c                                |    2 +
 accel/xen/xen-all.c                                |    2 +
 hmp-commands.hx                                    |   29 +
 hw/Kconfig                                         |    1 +
 hw/i386/Kconfig                                    |    5 +
 hw/i386/kvm/meson.build                            |   10 +
 hw/i386/kvm/xen-stubs.c                            |   25 +
 hw/i386/kvm/xen_evtchn.c                           | 1707 ++++++++++++++++++++
 hw/i386/kvm/xen_evtchn.h                           |   68 +
 hw/i386/kvm/xen_gnttab.c                           |  238 +++
 hw/i386/kvm/xen_gnttab.h                           |   25 +
 hw/i386/kvm/xen_overlay.c                          |  262 +++
 hw/i386/kvm/xen_overlay.h                          |   26 +
 hw/i386/kvm/xen_xenstore.c                         |  465 ++++++
 hw/i386/kvm/xen_xenstore.h                         |   20 +
 hw/i386/pc.c                                       |   23 +
 hw/i386/xen/meson.build                            |    5 +-
 hw/i386/xen/xen_platform.c                         |   57 +-
 hw/xen/Kconfig                                     |    3 +
 include/hw/i386/pc.h                               |    3 +
 include/hw/xen/xen.h                               |   21 +-
 include/standard-headers/xen/arch-x86/cpuid.h      |  118 ++
 include/standard-headers/xen/arch-x86/xen-x86_32.h |  194 +++
 include/standard-headers/xen/arch-x86/xen-x86_64.h |  241 +++
 include/standard-headers/xen/arch-x86/xen.h        |  398 +++++
 include/standard-headers/xen/event_channel.h       |  388 +++++
 include/standard-headers/xen/features.h            |  143 ++
 include/standard-headers/xen/grant_table.h         |  686 ++++++++
 include/standard-headers/xen/hvm/hvm_op.h          |  395 +++++
 include/standard-headers/xen/hvm/params.h          |  318 ++++
 include/standard-headers/xen/io/blkif.h            |  722 +++++++++
 include/standard-headers/xen/io/console.h          |   56 +
 include/standard-headers/xen/io/fbif.h             |  176 ++
 include/standard-headers/xen/io/kbdif.h            |  576 +++++++
 include/standard-headers/xen/io/netif.h            | 1102 +++++++++++++
 include/standard-headers/xen/io/protocols.h        |   42 +
 include/standard-headers/xen/io/ring.h             |  495 ++++++
 include/standard-headers/xen/io/usbif.h            |  425 +++++
 include/standard-headers/xen/io/xenbus.h           |   80 +
 include/standard-headers/xen/io/xs_wire.h          |  153 ++
 include/standard-headers/xen/memory.h              |  754 +++++++++
 include/standard-headers/xen/physdev.h             |  383 +++++
 include/standard-headers/xen/sched.h               |  202 +++
 include/standard-headers/xen/trace.h               |  341 ++++
 include/standard-headers/xen/vcpu.h                |  248 +++
 include/standard-headers/xen/version.h             |  113 ++
 include/standard-headers/xen/xen-compat.h          |   46 +
 include/standard-headers/xen/xen.h                 | 1049 ++++++++++++
 include/sysemu/kvm_int.h                           |    3 +
 include/sysemu/kvm_xen.h                           |   40 +
 meson.build                                        |    1 +
 monitor/misc.c                                     |    4 +
 qapi/misc.json                                     |   91 ++
 softmmu/globals.c                                  |    2 +-
 target/i386/cpu.c                                  |    1 +
 target/i386/cpu.h                                  |   19 +
 target/i386/kvm/kvm.c                              |  208 ++-
 target/i386/kvm/meson.build                        |    2 +
 target/i386/kvm/trace-events                       |    6 +
 target/i386/kvm/xen-compat.h                       |   51 +
 target/i386/kvm/xen-emu.c                          | 1698 +++++++++++++++++++
 target/i386/kvm/xen-emu.h                          |   33 +
 target/i386/machine.c                              |   25 +
 63 files changed, 15001 insertions(+), 24 deletions(-)




[RFC PATCH v7bis 00/19] Emulated Xen PV backend and PIRQ support
Posted by David Woodhouse 1 year, 3 months ago
This is what's in flux on top of the series posted as v7.

It contains both the PV back end support and the PIRQ support which were
previous posted in separate series. The former is mostly unchanged, and
we still need to either port that C++ XenStore implementation or make
Xen's own xenstored stop crashing when run without a real Xen. Although
the basic ops table support is probably sane enough to go in; it just
wants testing on real Xen.

As noted, it would have been nice to remove the old Xen headers in the 
first phase of patches with the initial platform support, but it really 
does need the header untangling which occurs as we go through the 
backend and introduce the generic ops support.

The PIRQ support is significantly reworked and I think I've fixed up
the locking issues, and tested with both emulated and VFIO passthrough
devices for MSI and MSI-X to PIRQ mappings.

I do need to disable the KVM irqfd routing if there are VFIO devices and 
the kernel is old enough not to support native evtchn delivery. I have 
plans around that¹ but was hoping to avoid them becoming a dependency of 
any of the Xen suppport, so I'll probably just make it refuse that 
configuration on older kernels.

¹ https://lore.kernel.org/qemu-devel/aaef9961d210ac1873153bf3cf01d984708744fc.camel@infradead.org/


David Woodhouse (18):
      hw/xen: Add evtchn operations to allow redirection to internal emulation
      hw/xen: Add emulated evtchn ops
      hw/xen: Add gnttab operations to allow redirection to internal emulation
      hw/xen: Pass grant ref to gnttab unmap
      hw/xen: Add foreignmem operations to allow redirection to internal emulation
      hw/xen: Move xenstore_store_pv_console_info to xen_console.c
      hw/xen: Use XEN_PAGE_SIZE in PV backend drivers
      hw/xen: Rename xen_common.h to xen_native.h
      hw/xen: Build PV backend drivers for XENFV_MACHINE
      hw/xen: Map guest XENSTORE_PFN grant in emulated Xenstore
      hw/xen: Add backend implementation of grant table operations
      hw/xen: Implement soft reset for emulated gnttab
      hw/xen: Remove old version of Xen headers
      i386/xen: Initialize XenBus and legacy backends from pc_init1()
      i386/xen: Implement HYPERVISOR_physdev_op
      hw/xen: Implement emulated PIRQ hypercall support
      hw/xen: Support GSI mapping to PIRQ
      hw/xen: Support MSI mapping to PIRQ

Paul Durrant (1):
      hw/xen: Add xenstore operations to allow redirection to internal emulation

 accel/xen/xen-all.c                           |   69 +-
 hw/9pfs/meson.build                           |    2 +-
 hw/9pfs/xen-9p-backend.c                      |   32 +-
 hw/9pfs/xen-9pfs.h                            |    4 +-
 hw/block/dataplane/meson.build                |    2 +-
 hw/block/dataplane/xen-block.c                |   12 +-
 hw/block/meson.build                          |    2 +-
 hw/block/xen-block.c                          |    1 -
 hw/block/xen_blkif.h                          |    5 +-
 hw/char/meson.build                           |    2 +-
 hw/char/xen_console.c                         |   59 +-
 hw/display/meson.build                        |    2 +-
 hw/display/xenfb.c                            |   38 +-
 hw/i386/kvm/trace-events                      |    5 +
 hw/i386/kvm/trace.h                           |    1 +
 hw/i386/kvm/xen-stubs.c                       |   15 +
 hw/i386/kvm/xen_evtchn.c                      |  596 ++++++++++++++-
 hw/i386/kvm/xen_evtchn.h                      |   23 +
 hw/i386/kvm/xen_gnttab.c                      |  320 +++++++-
 hw/i386/kvm/xen_gnttab.h                      |    1 +
 hw/i386/kvm/xen_xenstore.c                    |   13 +-
 hw/i386/pc_piix.c                             |   17 +
 hw/i386/x86.c                                 |   16 +
 hw/i386/xen/xen-hvm.c                         |   45 +-
 hw/i386/xen/xen-mapcache.c                    |    2 +-
 hw/i386/xen/xen_platform.c                    |    7 +-
 hw/net/xen_nic.c                              |   27 +-
 hw/pci/msi.c                                  |   11 +
 hw/pci/msix.c                                 |    7 +
 hw/pci/pci.c                                  |   17 +
 hw/usb/meson.build                            |    2 +-
 hw/usb/xen-usb.c                              |   31 +-
 hw/xen/meson.build                            |    6 +-
 hw/xen/trace-events                           |    2 +-
 hw/xen/xen-bus-helper.c                       |   61 +-
 hw/xen/xen-bus.c                              |  394 ++--------
 hw/xen/xen-legacy-backend.c                   |  256 ++-----
 hw/xen/xen-operations.c                       |  487 ++++++++++++
 hw/xen/xen_devconfig.c                        |    4 +-
 hw/xen/xen_pt.c                               |    2 +-
 hw/xen/xen_pt.h                               |    2 +-
 hw/xen/xen_pt_config_init.c                   |    2 +-
 hw/xen/xen_pt_graphics.c                      |    1 -
 hw/xen/xen_pt_msi.c                           |    4 +-
 hw/xen/xen_pvdev.c                            |   63 +-
 include/hw/pci/msi.h                          |    1 +
 include/hw/xen/interface/grant_table.h        |   36 -
 include/hw/xen/interface/io/blkif.h           |  712 -----------------
 include/hw/xen/interface/io/console.h         |   46 --
 include/hw/xen/interface/io/fbif.h            |  156 ----
 include/hw/xen/interface/io/kbdif.h           |  566 --------------
 include/hw/xen/interface/io/netif.h           | 1010 -------------------------
 include/hw/xen/interface/io/protocols.h       |   42 -
 include/hw/xen/interface/io/ring.h            |  474 ------------
 include/hw/xen/interface/io/usbif.h           |  254 -------
 include/hw/xen/interface/io/xenbus.h          |   70 --
 include/hw/xen/xen-bus-helper.h               |   25 +-
 include/hw/xen/xen-bus.h                      |   21 +-
 include/hw/xen/xen-legacy-backend.h           |   24 +-
 include/hw/xen/xen.h                          |   24 +-
 include/hw/xen/xen_backend_ops.h              |  393 ++++++++++
 include/hw/xen/{xen_common.h => xen_native.h} |   75 +-
 include/hw/xen/xen_pvdev.h                    |    6 +-
 meson.build                                   |    1 +
 softmmu/globals.c                             |    4 +
 target/i386/kvm/kvm.c                         |   12 +-
 target/i386/kvm/kvm_i386.h                    |    2 +
 target/i386/kvm/xen-compat.h                  |   19 +
 target/i386/kvm/xen-emu.c                     |  141 +++-
 69 files changed, 2491 insertions(+), 4293 deletions(-)




[RFC PATCH v7bis 01/19] hw/xen: Add evtchn operations to allow redirection to internal emulation
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paul Durrant <pdurrant@amazon.com>
---
 hw/9pfs/xen-9p-backend.c            |  24 +++---
 hw/i386/xen/xen-hvm.c               |  28 ++++---
 hw/xen/meson.build                  |   1 +
 hw/xen/xen-bus.c                    |  22 +++---
 hw/xen/xen-legacy-backend.c         |   8 +-
 hw/xen/xen-operations.c             |  71 +++++++++++++++++
 hw/xen/xen_pvdev.c                  |  12 +--
 include/hw/xen/xen-bus.h            |   1 +
 include/hw/xen/xen-legacy-backend.h |   1 +
 include/hw/xen/xen_backend_ops.h    | 118 ++++++++++++++++++++++++++++
 include/hw/xen/xen_common.h         |  12 ---
 include/hw/xen/xen_pvdev.h          |   1 +
 softmmu/globals.c                   |   1 +
 13 files changed, 243 insertions(+), 57 deletions(-)
 create mode 100644 hw/xen/xen-operations.c
 create mode 100644 include/hw/xen/xen_backend_ops.h

diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 65c4979c3c..864bdaf952 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -241,7 +241,7 @@ static void xen_9pfs_push_and_notify(V9fsPDU *pdu)
     xen_wmb();
 
     ring->inprogress = false;
-    xenevtchn_notify(ring->evtchndev, ring->local_port);
+    qemu_xen_evtchn_notify(ring->evtchndev, ring->local_port);
 
     qemu_bh_schedule(ring->bh);
 }
@@ -324,8 +324,8 @@ static void xen_9pfs_evtchn_event(void *opaque)
     Xen9pfsRing *ring = opaque;
     evtchn_port_t port;
 
-    port = xenevtchn_pending(ring->evtchndev);
-    xenevtchn_unmask(ring->evtchndev, port);
+    port = qemu_xen_evtchn_pending(ring->evtchndev);
+    qemu_xen_evtchn_unmask(ring->evtchndev, port);
 
     qemu_bh_schedule(ring->bh);
 }
@@ -337,10 +337,10 @@ static void xen_9pfs_disconnect(struct XenLegacyDevice *xendev)
 
     for (i = 0; i < xen_9pdev->num_rings; i++) {
         if (xen_9pdev->rings[i].evtchndev != NULL) {
-            qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
-                    NULL, NULL, NULL);
-            xenevtchn_unbind(xen_9pdev->rings[i].evtchndev,
-                             xen_9pdev->rings[i].local_port);
+            qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev),
+                                NULL, NULL, NULL);
+            qemu_xen_evtchn_unbind(xen_9pdev->rings[i].evtchndev,
+                                   xen_9pdev->rings[i].local_port);
             xen_9pdev->rings[i].evtchndev = NULL;
         }
     }
@@ -447,12 +447,12 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
         xen_9pdev->rings[i].inprogress = false;
 
 
-        xen_9pdev->rings[i].evtchndev = xenevtchn_open(NULL, 0);
+        xen_9pdev->rings[i].evtchndev = qemu_xen_evtchn_open();
         if (xen_9pdev->rings[i].evtchndev == NULL) {
             goto out;
         }
-        qemu_set_cloexec(xenevtchn_fd(xen_9pdev->rings[i].evtchndev));
-        xen_9pdev->rings[i].local_port = xenevtchn_bind_interdomain
+        qemu_set_cloexec(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev));
+        xen_9pdev->rings[i].local_port = qemu_xen_evtchn_bind_interdomain
                                             (xen_9pdev->rings[i].evtchndev,
                                              xendev->dom,
                                              xen_9pdev->rings[i].evtchn);
@@ -463,8 +463,8 @@ static int xen_9pfs_connect(struct XenLegacyDevice *xendev)
             goto out;
         }
         xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
-        qemu_set_fd_handler(xenevtchn_fd(xen_9pdev->rings[i].evtchndev),
-                xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]);
+        qemu_set_fd_handler(qemu_xen_evtchn_fd(xen_9pdev->rings[i].evtchndev),
+                            xen_9pfs_evtchn_event, NULL, &xen_9pdev->rings[i]);
     }
 
     xen_9pdev->security_model = xenstore_read_be_str(xendev, "security_model");
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index e4293d6d66..38146efb5a 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -18,6 +18,7 @@
 #include "hw/irq.h"
 #include "hw/hw.h"
 #include "hw/i386/apic-msidef.h"
+#include "hw/xen/xen_backend_ops.h"
 #include "hw/xen/xen_common.h"
 #include "hw/xen/xen-legacy-backend.h"
 #include "hw/xen/xen-bus.h"
@@ -761,7 +762,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state)
     int i;
     evtchn_port_t port;
 
-    port = xenevtchn_pending(state->xce_handle);
+    port = qemu_xen_evtchn_pending(state->xce_handle);
     if (port == state->bufioreq_local_port) {
         timer_mod(state->buffered_io_timer,
                 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
@@ -780,7 +781,7 @@ static ioreq_t *cpu_get_ioreq(XenIOState *state)
         }
 
         /* unmask the wanted port again */
-        xenevtchn_unmask(state->xce_handle, port);
+        qemu_xen_evtchn_unmask(state->xce_handle, port);
 
         /* get the io packet from shared memory */
         state->send_vcpu = i;
@@ -1147,7 +1148,7 @@ static void handle_buffered_io(void *opaque)
                 BUFFER_IO_MAX_DELAY + qemu_clock_get_ms(QEMU_CLOCK_REALTIME));
     } else {
         timer_del(state->buffered_io_timer);
-        xenevtchn_unmask(state->xce_handle, state->bufioreq_local_port);
+        qemu_xen_evtchn_unmask(state->xce_handle, state->bufioreq_local_port);
     }
 }
 
@@ -1196,8 +1197,8 @@ static void cpu_handle_ioreq(void *opaque)
         }
 
         req->state = STATE_IORESP_READY;
-        xenevtchn_notify(state->xce_handle,
-                         state->ioreq_local_port[state->send_vcpu]);
+        qemu_xen_evtchn_notify(state->xce_handle,
+                               state->ioreq_local_port[state->send_vcpu]);
     }
 }
 
@@ -1206,7 +1207,7 @@ static void xen_main_loop_prepare(XenIOState *state)
     int evtchn_fd = -1;
 
     if (state->xce_handle != NULL) {
-        evtchn_fd = xenevtchn_fd(state->xce_handle);
+        evtchn_fd = qemu_xen_evtchn_fd(state->xce_handle);
     }
 
     state->buffered_io_timer = timer_new_ms(QEMU_CLOCK_REALTIME, handle_buffered_io,
@@ -1249,7 +1250,7 @@ static void xen_exit_notifier(Notifier *n, void *data)
         xenforeignmemory_unmap_resource(xen_fmem, state->fres);
     }
 
-    xenevtchn_close(state->xce_handle);
+    qemu_xen_evtchn_close(state->xce_handle);
     xs_daemon_close(state->xenstore);
 }
 
@@ -1397,9 +1398,11 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
     xen_pfn_t ioreq_pfn;
     XenIOState *state;
 
+    setup_xen_backend_ops();
+
     state = g_new0(XenIOState, 1);
 
-    state->xce_handle = xenevtchn_open(NULL, 0);
+    state->xce_handle = qemu_xen_evtchn_open();
     if (state->xce_handle == NULL) {
         perror("xen: event channel open");
         goto err;
@@ -1463,8 +1466,9 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
 
     /* FIXME: how about if we overflow the page here? */
     for (i = 0; i < max_cpus; i++) {
-        rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
-                                        xen_vcpu_eport(state->shared_page, i));
+        rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
+                                              xen_vcpu_eport(state->shared_page,
+                                                             i));
         if (rc == -1) {
             error_report("shared evtchn %d bind error %d", i, errno);
             goto err;
@@ -1472,8 +1476,8 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
         state->ioreq_local_port[i] = rc;
     }
 
-    rc = xenevtchn_bind_interdomain(state->xce_handle, xen_domid,
-                                    state->bufioreq_remote_port);
+    rc = qemu_xen_evtchn_bind_interdomain(state->xce_handle, xen_domid,
+                                          state->bufioreq_remote_port);
     if (rc == -1) {
         error_report("buffered evtchn bind error %d", errno);
         goto err;
diff --git a/hw/xen/meson.build b/hw/xen/meson.build
index ae0ace3046..f195bbd25c 100644
--- a/hw/xen/meson.build
+++ b/hw/xen/meson.build
@@ -5,6 +5,7 @@ softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files(
   'xen-legacy-backend.c',
   'xen_devconfig.c',
   'xen_pvdev.c',
+  'xen-operations.c',
 ))
 
 xen_specific_ss = ss.source_set()
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index df3f6b9ae0..d0b1ae93da 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -1095,12 +1095,12 @@ static bool xen_device_poll(void *opaque)
 static void xen_device_event(void *opaque)
 {
     XenEventChannel *channel = opaque;
-    unsigned long port = xenevtchn_pending(channel->xeh);
+    unsigned long port = qemu_xen_evtchn_pending(channel->xeh);
 
     if (port == channel->local_port) {
         xen_device_poll(channel);
 
-        xenevtchn_unmask(channel->xeh, port);
+        qemu_xen_evtchn_unmask(channel->xeh, port);
     }
 }
 
@@ -1115,11 +1115,11 @@ void xen_device_set_event_channel_context(XenDevice *xendev,
     }
 
     if (channel->ctx)
-        aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+        aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
                            NULL, NULL, NULL, NULL, NULL);
 
     channel->ctx = ctx;
-    aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+    aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
                        xen_device_event, NULL, xen_device_poll, NULL, channel);
 }
 
@@ -1131,13 +1131,13 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
     XenEventChannel *channel = g_new0(XenEventChannel, 1);
     xenevtchn_port_or_error_t local_port;
 
-    channel->xeh = xenevtchn_open(NULL, 0);
+    channel->xeh = qemu_xen_evtchn_open();
     if (!channel->xeh) {
         error_setg_errno(errp, errno, "failed xenevtchn_open");
         goto fail;
     }
 
-    local_port = xenevtchn_bind_interdomain(channel->xeh,
+    local_port = qemu_xen_evtchn_bind_interdomain(channel->xeh,
                                             xendev->frontend_id,
                                             port);
     if (local_port < 0) {
@@ -1160,7 +1160,7 @@ XenEventChannel *xen_device_bind_event_channel(XenDevice *xendev,
 
 fail:
     if (channel->xeh) {
-        xenevtchn_close(channel->xeh);
+        qemu_xen_evtchn_close(channel->xeh);
     }
 
     g_free(channel);
@@ -1177,7 +1177,7 @@ void xen_device_notify_event_channel(XenDevice *xendev,
         return;
     }
 
-    if (xenevtchn_notify(channel->xeh, channel->local_port) < 0) {
+    if (qemu_xen_evtchn_notify(channel->xeh, channel->local_port) < 0) {
         error_setg_errno(errp, errno, "xenevtchn_notify failed");
     }
 }
@@ -1193,14 +1193,14 @@ void xen_device_unbind_event_channel(XenDevice *xendev,
 
     QLIST_REMOVE(channel, list);
 
-    aio_set_fd_handler(channel->ctx, xenevtchn_fd(channel->xeh), true,
+    aio_set_fd_handler(channel->ctx, qemu_xen_evtchn_fd(channel->xeh), true,
                        NULL, NULL, NULL, NULL, NULL);
 
-    if (xenevtchn_unbind(channel->xeh, channel->local_port) < 0) {
+    if (qemu_xen_evtchn_unbind(channel->xeh, channel->local_port) < 0) {
         error_setg_errno(errp, errno, "xenevtchn_unbind failed");
     }
 
-    xenevtchn_close(channel->xeh);
+    qemu_xen_evtchn_close(channel->xeh);
     g_free(channel);
 }
 
diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c
index 085fd31ef7..23e8a6fbd8 100644
--- a/hw/xen/xen-legacy-backend.c
+++ b/hw/xen/xen-legacy-backend.c
@@ -294,13 +294,13 @@ static struct XenLegacyDevice *xen_be_get_xendev(const char *type, int dom,
     xendev->debug      = debug;
     xendev->local_port = -1;
 
-    xendev->evtchndev = xenevtchn_open(NULL, 0);
+    xendev->evtchndev = qemu_xen_evtchn_open();
     if (xendev->evtchndev == NULL) {
         xen_pv_printf(NULL, 0, "can't open evtchn device\n");
         qdev_unplug(DEVICE(xendev), NULL);
         return NULL;
     }
-    qemu_set_cloexec(xenevtchn_fd(xendev->evtchndev));
+    qemu_set_cloexec(qemu_xen_evtchn_fd(xendev->evtchndev));
 
     xen_pv_insert_xendev(xendev);
 
@@ -763,14 +763,14 @@ int xen_be_bind_evtchn(struct XenLegacyDevice *xendev)
     if (xendev->local_port != -1) {
         return 0;
     }
-    xendev->local_port = xenevtchn_bind_interdomain
+    xendev->local_port = qemu_xen_evtchn_bind_interdomain
         (xendev->evtchndev, xendev->dom, xendev->remote_port);
     if (xendev->local_port == -1) {
         xen_pv_printf(xendev, 0, "xenevtchn_bind_interdomain failed\n");
         return -1;
     }
     xen_pv_printf(xendev, 2, "bind evtchn port %d\n", xendev->local_port);
-    qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev),
+    qemu_set_fd_handler(qemu_xen_evtchn_fd(xendev->evtchndev),
                         xen_pv_evtchn_event, NULL, xendev);
     return 0;
 }
diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c
new file mode 100644
index 0000000000..1a959d89e8
--- /dev/null
+++ b/hw/xen/xen-operations.c
@@ -0,0 +1,71 @@
+/*
+ * QEMU Xen backend support: Operations for true Xen
+ *
+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu/osdep.h"
+#include "qapi/error.h"
+
+#include "hw/xen/xen_backend_ops.h"
+#include "hw/xen/xen_common.h"
+
+/*
+ * If we have new enough libxenctrl then we do not want/need these compat
+ * interfaces, despite what the user supplied cflags might say. They
+ * must be undefined before including xenctrl.h
+ */
+#undef XC_WANT_COMPAT_EVTCHN_API
+
+#include <xenctrl.h>
+
+/*
+ * We don't support Xen prior to 4.2.0.
+ */
+
+/* Xen 4.2 through 4.6 */
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701
+
+typedef xc_evtchn xenevtchn_handle;
+typedef evtchn_port_or_error_t xenevtchn_port_or_error_t;
+
+#define xenevtchn_open(l, f) xc_evtchn_open(l, f);
+#define xenevtchn_close(h) xc_evtchn_close(h)
+#define xenevtchn_fd(h) xc_evtchn_fd(h)
+#define xenevtchn_pending(h) xc_evtchn_pending(h)
+#define xenevtchn_notify(h, p) xc_evtchn_notify(h, p)
+#define xenevtchn_bind_interdomain(h, d, p) xc_evtchn_bind_interdomain(h, d, p)
+#define xenevtchn_unmask(h, p) xc_evtchn_unmask(h, p)
+#define xenevtchn_unbind(h, p) xc_evtchn_unbind(h, p)
+
+#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
+
+#include <xenevtchn.h>
+
+#endif
+
+static xenevtchn_handle *libxenevtchn_backend_open(void)
+{
+    return xenevtchn_open(NULL, 0);
+}
+
+struct evtchn_backend_ops libxenevtchn_backend_ops = {
+    .open = libxenevtchn_backend_open,
+    .close = xenevtchn_close,
+    .bind_interdomain = xenevtchn_bind_interdomain,
+    .unbind = xenevtchn_unbind,
+    .get_fd = xenevtchn_fd,
+    .notify = xenevtchn_notify,
+    .unmask = xenevtchn_unmask,
+    .pending = xenevtchn_pending,
+};
+
+void setup_xen_backend_ops(void)
+{
+    xen_evtchn_ops = &libxenevtchn_backend_ops;
+}
diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c
index 1a5177b354..86a2c8e567 100644
--- a/hw/xen/xen_pvdev.c
+++ b/hw/xen/xen_pvdev.c
@@ -238,14 +238,14 @@ void xen_pv_evtchn_event(void *opaque)
     struct XenLegacyDevice *xendev = opaque;
     evtchn_port_t port;
 
-    port = xenevtchn_pending(xendev->evtchndev);
+    port = qemu_xen_evtchn_pending(xendev->evtchndev);
     if (port != xendev->local_port) {
         xen_pv_printf(xendev, 0,
                       "xenevtchn_pending returned %d (expected %d)\n",
                       port, xendev->local_port);
         return;
     }
-    xenevtchn_unmask(xendev->evtchndev, port);
+    qemu_xen_evtchn_unmask(xendev->evtchndev, port);
 
     if (xendev->ops->event) {
         xendev->ops->event(xendev);
@@ -257,15 +257,15 @@ void xen_pv_unbind_evtchn(struct XenLegacyDevice *xendev)
     if (xendev->local_port == -1) {
         return;
     }
-    qemu_set_fd_handler(xenevtchn_fd(xendev->evtchndev), NULL, NULL, NULL);
-    xenevtchn_unbind(xendev->evtchndev, xendev->local_port);
+    qemu_set_fd_handler(qemu_xen_evtchn_fd(xendev->evtchndev), NULL, NULL, NULL);
+    qemu_xen_evtchn_unbind(xendev->evtchndev, xendev->local_port);
     xen_pv_printf(xendev, 2, "unbind evtchn port %d\n", xendev->local_port);
     xendev->local_port = -1;
 }
 
 int xen_pv_send_notify(struct XenLegacyDevice *xendev)
 {
-    return xenevtchn_notify(xendev->evtchndev, xendev->local_port);
+    return qemu_xen_evtchn_notify(xendev->evtchndev, xendev->local_port);
 }
 
 /* ------------------------------------------------------------- */
@@ -306,7 +306,7 @@ void xen_pv_del_xendev(struct XenLegacyDevice *xendev)
     }
 
     if (xendev->evtchndev != NULL) {
-        xenevtchn_close(xendev->evtchndev);
+        qemu_xen_evtchn_close(xendev->evtchndev);
     }
     if (xendev->gnttabdev != NULL) {
         xengnttab_close(xendev->gnttabdev);
diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h
index 4d966a2dbb..e21b83796e 100644
--- a/include/hw/xen/xen-bus.h
+++ b/include/hw/xen/xen-bus.h
@@ -8,6 +8,7 @@
 #ifndef HW_XEN_BUS_H
 #define HW_XEN_BUS_H
 
+#include "hw/xen/xen_backend_ops.h"
 #include "hw/xen/xen_common.h"
 #include "hw/sysbus.h"
 #include "qemu/notify.h"
diff --git a/include/hw/xen/xen-legacy-backend.h b/include/hw/xen/xen-legacy-backend.h
index be281e1f38..0ef9b772ac 100644
--- a/include/hw/xen/xen-legacy-backend.h
+++ b/include/hw/xen/xen-legacy-backend.h
@@ -2,6 +2,7 @@
 #define HW_XEN_LEGACY_BACKEND_H
 
 #include "hw/xen/xen_common.h"
+#include "hw/xen/xen_backend_ops.h"
 #include "hw/xen/xen_pvdev.h"
 #include "net/net.h"
 #include "qom/object.h"
diff --git a/include/hw/xen/xen_backend_ops.h b/include/hw/xen/xen_backend_ops.h
new file mode 100644
index 0000000000..9605456e81
--- /dev/null
+++ b/include/hw/xen/xen_backend_ops.h
@@ -0,0 +1,118 @@
+/*
+ * QEMU Xen backend support
+ *
+ * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * Authors: David Woodhouse <dwmw2@infradead.org>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#ifndef QEMU_XEN_BACKEND_OPS_H
+#define QEMU_XEN_BACKEND_OPS_H
+
+/*
+ * For the time being, these operations map fairly closely to the API of
+ * the actual Xen libraries, e.g. libxenevtchn. As we complete the migration
+ * from XenLegacyDevice back ends to the new XenDevice model, they may
+ * evolve to slightly higher-level APIs.
+ *
+ * The internal emulations do not emulate the Xen APIs entirely faithfully;
+ * only enough to be used by the Xen backend devices. For example, only one
+ * event channel can be bound to each handle, since that's sufficient for
+ * the device support (only the true Xen HVM backend uses more). And the
+ * behaviour of unmask() and pending() is different too because the device
+ * backends don't care.
+ */
+
+typedef struct xenevtchn_handle xenevtchn_handle;
+typedef int xenevtchn_port_or_error_t;
+typedef uint32_t evtchn_port_t;
+
+struct evtchn_backend_ops {
+    xenevtchn_handle *(*open)(void);
+    int (*bind_interdomain)(xenevtchn_handle *xc, uint32_t domid,
+                            evtchn_port_t guest_port);
+    int (*unbind)(xenevtchn_handle *xc, evtchn_port_t port);
+    int (*close)(struct xenevtchn_handle *xc);
+    int (*get_fd)(struct xenevtchn_handle *xc);
+    int (*notify)(struct xenevtchn_handle *xc, evtchn_port_t port);
+    int (*unmask)(struct xenevtchn_handle *xc, evtchn_port_t port);
+    int (*pending)(struct xenevtchn_handle *xc);
+};
+
+extern struct evtchn_backend_ops *xen_evtchn_ops;
+
+static inline xenevtchn_handle *qemu_xen_evtchn_open(void)
+{
+    if (!xen_evtchn_ops) {
+        return NULL;
+    }
+    return xen_evtchn_ops->open();
+}
+
+static inline int qemu_xen_evtchn_bind_interdomain(xenevtchn_handle *xc,
+                                                   uint32_t domid,
+                                                   evtchn_port_t guest_port)
+{
+    if (!xen_evtchn_ops) {
+        return -ENOSYS;
+    }
+    return xen_evtchn_ops->bind_interdomain(xc, domid, guest_port);
+}
+
+static inline int qemu_xen_evtchn_unbind(xenevtchn_handle *xc,
+                                         evtchn_port_t port)
+{
+    if (!xen_evtchn_ops) {
+        return -ENOSYS;
+    }
+    return xen_evtchn_ops->unbind(xc, port);
+}
+
+static inline int qemu_xen_evtchn_close(xenevtchn_handle *xc)
+{
+    if (!xen_evtchn_ops) {
+        return -ENOSYS;
+    }
+    return xen_evtchn_ops->close(xc);
+}
+
+static inline int qemu_xen_evtchn_fd(xenevtchn_handle *xc)
+{
+    if (!xen_evtchn_ops) {
+        return -ENOSYS;
+    }
+    return xen_evtchn_ops->get_fd(xc);
+}
+
+static inline int qemu_xen_evtchn_notify(xenevtchn_handle *xc,
+                                         evtchn_port_t port)
+{
+    if (!xen_evtchn_ops) {
+        return -ENOSYS;
+    }
+    return xen_evtchn_ops->notify(xc, port);
+}
+
+static inline int qemu_xen_evtchn_unmask(xenevtchn_handle *xc,
+                                         evtchn_port_t port)
+{
+    if (!xen_evtchn_ops) {
+        return -ENOSYS;
+    }
+    return xen_evtchn_ops->unmask(xc, port);
+}
+
+static inline int qemu_xen_evtchn_pending(xenevtchn_handle *xc)
+{
+    if (!xen_evtchn_ops) {
+        return -ENOSYS;
+    }
+    return xen_evtchn_ops->pending(xc);
+}
+
+void setup_xen_backend_ops(void);
+
+#endif /* QEMU_XEN_BACKEND_OPS_H */
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
index 9a13a756ae..34abd0a772 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -28,18 +28,7 @@ extern xc_interface *xen_xc;
 #if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701
 
 typedef xc_interface xenforeignmemory_handle;
-typedef xc_evtchn xenevtchn_handle;
 typedef xc_gnttab xengnttab_handle;
-typedef evtchn_port_or_error_t xenevtchn_port_or_error_t;
-
-#define xenevtchn_open(l, f) xc_evtchn_open(l, f);
-#define xenevtchn_close(h) xc_evtchn_close(h)
-#define xenevtchn_fd(h) xc_evtchn_fd(h)
-#define xenevtchn_pending(h) xc_evtchn_pending(h)
-#define xenevtchn_notify(h, p) xc_evtchn_notify(h, p)
-#define xenevtchn_bind_interdomain(h, d, p) xc_evtchn_bind_interdomain(h, d, p)
-#define xenevtchn_unmask(h, p) xc_evtchn_unmask(h, p)
-#define xenevtchn_unbind(h, p) xc_evtchn_unbind(h, p)
 
 #define xengnttab_open(l, f) xc_gnttab_open(l, f)
 #define xengnttab_close(h) xc_gnttab_close(h)
@@ -69,7 +58,6 @@ static inline void *xenforeignmemory_map(xc_interface *h, uint32_t dom,
 
 #else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
 
-#include <xenevtchn.h>
 #include <xengnttab.h>
 #include <xenforeignmemory.h>
 
diff --git a/include/hw/xen/xen_pvdev.h b/include/hw/xen/xen_pvdev.h
index 7cd4bc2b82..9c27b14764 100644
--- a/include/hw/xen/xen_pvdev.h
+++ b/include/hw/xen/xen_pvdev.h
@@ -1,6 +1,7 @@
 #ifndef QEMU_HW_XEN_PVDEV_H
 #define QEMU_HW_XEN_PVDEV_H
 
+#include "hw/xen/xen_backend_ops.h"
 #include "hw/xen/xen_common.h"
 /* ------------------------------------------------------------- */
 
diff --git a/softmmu/globals.c b/softmmu/globals.c
index 0a4405614e..eb62739be1 100644
--- a/softmmu/globals.c
+++ b/softmmu/globals.c
@@ -65,3 +65,4 @@ bool qemu_uuid_set;
 uint32_t xen_domid;
 enum xen_mode xen_mode = XEN_DISABLED;
 bool xen_domid_restrict;
+struct evtchn_backend_ops *xen_evtchn_ops;
-- 
2.39.0


[RFC PATCH v7bis 02/19] hw/xen: Add emulated evtchn ops
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/i386/kvm/xen_evtchn.c | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index b23bb71abb..b571517b2c 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -30,6 +30,7 @@
 #include "hw/i386/pc.h"
 #include "hw/pci/pci.h"
 #include "hw/irq.h"
+#include "hw/xen/xen_backend_ops.h"
 
 #include "xen_evtchn.h"
 #include "xen_overlay.h"
@@ -198,6 +199,17 @@ static const TypeInfo xen_evtchn_info = {
     .class_init    = xen_evtchn_class_init,
 };
 
+static struct evtchn_backend_ops emu_evtchn_backend_ops = {
+    .open = xen_be_evtchn_open,
+    .bind_interdomain = xen_be_evtchn_bind_interdomain,
+    .unbind = xen_be_evtchn_unbind,
+    .close = xen_be_evtchn_close,
+    .get_fd = xen_be_evtchn_fd,
+    .notify = xen_be_evtchn_notify,
+    .unmask = xen_be_evtchn_unmask,
+    .pending = xen_be_evtchn_pending,
+};
+
 static void gsi_assert_bh(void *opaque)
 {
     struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
@@ -220,6 +232,9 @@ void xen_evtchn_create(void)
     for (i = 0; i < GSI_NUM_PINS; i++) {
         sysbus_init_irq(SYS_BUS_DEVICE(s), &s->gsis[i]);
     }
+
+    /* Set event channel functions for backend drivers to use */
+    xen_evtchn_ops = &emu_evtchn_backend_ops;
 }
 
 void xen_evtchn_connect_gsis(qemu_irq *system_gsis)
-- 
2.39.0
[RFC PATCH v7bis 03/19] hw/xen: Add gnttab operations to allow redirection to internal emulation
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

In emulation, mapping more than one grant ref to be virtually contiguous
would be fairly difficult. The best way to do it might be to make the
ram_block mappings actually backed by a file (shmem or a deleted file,
perhaps) so that we can have multiple *shared* mappings of it. But that
would be fairly intrusive.

Making the backend drivers cope with page *lists* instead of expecting
the mapping to be contiguous is also non-trivial, since some structures
would actually *cross* page boundaries (e.g. the 32-bit blkif responses
which are 12 bytes).

So for now, we'll support only single-page mappings in emulation.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paul Durrant <pdurrant@amazon.com>
---
 hw/xen/xen-bus.c                    | 112 ++------------------
 hw/xen/xen-legacy-backend.c         | 125 ++--------------------
 hw/xen/xen-operations.c             | 157 ++++++++++++++++++++++++++++
 hw/xen/xen_pvdev.c                  |   2 +-
 include/hw/xen/xen-bus.h            |   3 +-
 include/hw/xen/xen-legacy-backend.h |  13 +--
 include/hw/xen/xen_backend_ops.h    |  93 ++++++++++++++++
 include/hw/xen/xen_common.h         |  39 -------
 softmmu/globals.c                   |   1 +
 9 files changed, 273 insertions(+), 272 deletions(-)

diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index d0b1ae93da..b247e86f28 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -947,7 +947,7 @@ static void xen_device_frontend_destroy(XenDevice *xendev)
 void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs,
                                    Error **errp)
 {
-    if (xengnttab_set_max_grants(xendev->xgth, nr_refs)) {
+    if (qemu_xen_gnttab_set_max_grants(xendev->xgth, nr_refs)) {
         error_setg_errno(errp, errno, "xengnttab_set_max_grants failed");
     }
 }
@@ -956,9 +956,8 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs,
                                 unsigned int nr_refs, int prot,
                                 Error **errp)
 {
-    void *map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_refs,
-                                                xendev->frontend_id, refs,
-                                                prot);
+    void *map = qemu_xen_gnttab_map_refs(xendev->xgth, nr_refs,
+                                         xendev->frontend_id, refs, prot);
 
     if (!map) {
         error_setg_errno(errp, errno,
@@ -971,109 +970,17 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs,
 void xen_device_unmap_grant_refs(XenDevice *xendev, void *map,
                                  unsigned int nr_refs, Error **errp)
 {
-    if (xengnttab_unmap(xendev->xgth, map, nr_refs)) {
+    if (qemu_xen_gnttab_unmap(xendev->xgth, map, nr_refs)) {
         error_setg_errno(errp, errno, "xengnttab_unmap failed");
     }
 }
 
-static void compat_copy_grant_refs(XenDevice *xendev, bool to_domain,
-                                   XenDeviceGrantCopySegment segs[],
-                                   unsigned int nr_segs, Error **errp)
-{
-    uint32_t *refs = g_new(uint32_t, nr_segs);
-    int prot = to_domain ? PROT_WRITE : PROT_READ;
-    void *map;
-    unsigned int i;
-
-    for (i = 0; i < nr_segs; i++) {
-        XenDeviceGrantCopySegment *seg = &segs[i];
-
-        refs[i] = to_domain ? seg->dest.foreign.ref :
-            seg->source.foreign.ref;
-    }
-
-    map = xengnttab_map_domain_grant_refs(xendev->xgth, nr_segs,
-                                          xendev->frontend_id, refs,
-                                          prot);
-    if (!map) {
-        error_setg_errno(errp, errno,
-                         "xengnttab_map_domain_grant_refs failed");
-        goto done;
-    }
-
-    for (i = 0; i < nr_segs; i++) {
-        XenDeviceGrantCopySegment *seg = &segs[i];
-        void *page = map + (i * XC_PAGE_SIZE);
-
-        if (to_domain) {
-            memcpy(page + seg->dest.foreign.offset, seg->source.virt,
-                   seg->len);
-        } else {
-            memcpy(seg->dest.virt, page + seg->source.foreign.offset,
-                   seg->len);
-        }
-    }
-
-    if (xengnttab_unmap(xendev->xgth, map, nr_segs)) {
-        error_setg_errno(errp, errno, "xengnttab_unmap failed");
-    }
-
-done:
-    g_free(refs);
-}
-
 void xen_device_copy_grant_refs(XenDevice *xendev, bool to_domain,
                                 XenDeviceGrantCopySegment segs[],
                                 unsigned int nr_segs, Error **errp)
 {
-    xengnttab_grant_copy_segment_t *xengnttab_segs;
-    unsigned int i;
-
-    if (!xendev->feature_grant_copy) {
-        compat_copy_grant_refs(xendev, to_domain, segs, nr_segs, errp);
-        return;
-    }
-
-    xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs);
-
-    for (i = 0; i < nr_segs; i++) {
-        XenDeviceGrantCopySegment *seg = &segs[i];
-        xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
-
-        if (to_domain) {
-            xengnttab_seg->flags = GNTCOPY_dest_gref;
-            xengnttab_seg->dest.foreign.domid = xendev->frontend_id;
-            xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref;
-            xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset;
-            xengnttab_seg->source.virt = seg->source.virt;
-        } else {
-            xengnttab_seg->flags = GNTCOPY_source_gref;
-            xengnttab_seg->source.foreign.domid = xendev->frontend_id;
-            xengnttab_seg->source.foreign.ref = seg->source.foreign.ref;
-            xengnttab_seg->source.foreign.offset =
-                seg->source.foreign.offset;
-            xengnttab_seg->dest.virt = seg->dest.virt;
-        }
-
-        xengnttab_seg->len = seg->len;
-    }
-
-    if (xengnttab_grant_copy(xendev->xgth, nr_segs, xengnttab_segs)) {
-        error_setg_errno(errp, errno, "xengnttab_grant_copy failed");
-        goto done;
-    }
-
-    for (i = 0; i < nr_segs; i++) {
-        xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
-
-        if (xengnttab_seg->status != GNTST_okay) {
-            error_setg(errp, "xengnttab_grant_copy seg[%u] failed", i);
-            break;
-        }
-    }
-
-done:
-    g_free(xengnttab_segs);
+    qemu_xen_gnttab_grant_copy(xendev->xgth, to_domain, xendev->frontend_id,
+                               (XenGrantCopySegment *)segs, nr_segs, errp);
 }
 
 struct XenEventChannel {
@@ -1235,7 +1142,7 @@ static void xen_device_unrealize(DeviceState *dev)
     xen_device_backend_destroy(xendev);
 
     if (xendev->xgth) {
-        xengnttab_close(xendev->xgth);
+        qemu_xen_gnttab_close(xendev->xgth);
         xendev->xgth = NULL;
     }
 
@@ -1298,15 +1205,12 @@ static void xen_device_realize(DeviceState *dev, Error **errp)
 
     xendev->watch_list = watch_list_create(xendev->xsh);
 
-    xendev->xgth = xengnttab_open(NULL, 0);
+    xendev->xgth = qemu_xen_gnttab_open();
     if (!xendev->xgth) {
         error_setg_errno(errp, errno, "failed xengnttab_open");
         goto unrealize;
     }
 
-    xendev->feature_grant_copy =
-        (xengnttab_grant_copy(xendev->xgth, 0, NULL) == 0);
-
     xen_device_backend_create(xendev, errp);
     if (*errp) {
         goto unrealize;
diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c
index 23e8a6fbd8..88d2ae3f0a 100644
--- a/hw/xen/xen-legacy-backend.c
+++ b/hw/xen/xen-legacy-backend.c
@@ -43,7 +43,6 @@ struct xs_handle *xenstore;
 const char *xen_protocol;
 
 /* private */
-static bool xen_feature_grant_copy;
 static int debug;
 
 int xenstore_write_be_str(struct XenLegacyDevice *xendev, const char *node,
@@ -113,7 +112,7 @@ void xen_be_set_max_grant_refs(struct XenLegacyDevice *xendev,
 {
     assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV);
 
-    if (xengnttab_set_max_grants(xendev->gnttabdev, nr_refs)) {
+    if (qemu_xen_gnttab_set_max_grants(xendev->gnttabdev, nr_refs)) {
         xen_pv_printf(xendev, 0, "xengnttab_set_max_grants failed: %s\n",
                       strerror(errno));
     }
@@ -126,8 +125,8 @@ void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs,
 
     assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV);
 
-    ptr = xengnttab_map_domain_grant_refs(xendev->gnttabdev, nr_refs,
-                                          xen_domid, refs, prot);
+    ptr = qemu_xen_gnttab_map_refs(xendev->gnttabdev, nr_refs, xen_domid, refs,
+                                   prot);
     if (!ptr) {
         xen_pv_printf(xendev, 0,
                       "xengnttab_map_domain_grant_refs failed: %s\n",
@@ -142,119 +141,27 @@ void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr,
 {
     assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV);
 
-    if (xengnttab_unmap(xendev->gnttabdev, ptr, nr_refs)) {
+    if (qemu_xen_gnttab_unmap(xendev->gnttabdev, ptr, nr_refs)) {
         xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n",
                       strerror(errno));
     }
 }
 
-static int compat_copy_grant_refs(struct XenLegacyDevice *xendev,
-                                  bool to_domain,
-                                  XenGrantCopySegment segs[],
-                                  unsigned int nr_segs)
-{
-    uint32_t *refs = g_new(uint32_t, nr_segs);
-    int prot = to_domain ? PROT_WRITE : PROT_READ;
-    void *pages;
-    unsigned int i;
-
-    for (i = 0; i < nr_segs; i++) {
-        XenGrantCopySegment *seg = &segs[i];
-
-        refs[i] = to_domain ?
-            seg->dest.foreign.ref : seg->source.foreign.ref;
-    }
-
-    pages = xengnttab_map_domain_grant_refs(xendev->gnttabdev, nr_segs,
-                                            xen_domid, refs, prot);
-    if (!pages) {
-        xen_pv_printf(xendev, 0,
-                      "xengnttab_map_domain_grant_refs failed: %s\n",
-                      strerror(errno));
-        g_free(refs);
-        return -1;
-    }
-
-    for (i = 0; i < nr_segs; i++) {
-        XenGrantCopySegment *seg = &segs[i];
-        void *page = pages + (i * XC_PAGE_SIZE);
-
-        if (to_domain) {
-            memcpy(page + seg->dest.foreign.offset, seg->source.virt,
-                   seg->len);
-        } else {
-            memcpy(seg->dest.virt, page + seg->source.foreign.offset,
-                   seg->len);
-        }
-    }
-
-    if (xengnttab_unmap(xendev->gnttabdev, pages, nr_segs)) {
-        xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n",
-                      strerror(errno));
-    }
-
-    g_free(refs);
-    return 0;
-}
-
 int xen_be_copy_grant_refs(struct XenLegacyDevice *xendev,
                            bool to_domain,
                            XenGrantCopySegment segs[],
                            unsigned int nr_segs)
 {
-    xengnttab_grant_copy_segment_t *xengnttab_segs;
-    unsigned int i;
     int rc;
 
     assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV);
 
-    if (!xen_feature_grant_copy) {
-        return compat_copy_grant_refs(xendev, to_domain, segs, nr_segs);
-    }
-
-    xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs);
-
-    for (i = 0; i < nr_segs; i++) {
-        XenGrantCopySegment *seg = &segs[i];
-        xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
-
-        if (to_domain) {
-            xengnttab_seg->flags = GNTCOPY_dest_gref;
-            xengnttab_seg->dest.foreign.domid = xen_domid;
-            xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref;
-            xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset;
-            xengnttab_seg->source.virt = seg->source.virt;
-        } else {
-            xengnttab_seg->flags = GNTCOPY_source_gref;
-            xengnttab_seg->source.foreign.domid = xen_domid;
-            xengnttab_seg->source.foreign.ref = seg->source.foreign.ref;
-            xengnttab_seg->source.foreign.offset =
-                seg->source.foreign.offset;
-            xengnttab_seg->dest.virt = seg->dest.virt;
-        }
-
-        xengnttab_seg->len = seg->len;
-    }
-
-    rc = xengnttab_grant_copy(xendev->gnttabdev, nr_segs, xengnttab_segs);
-
+    rc = qemu_xen_gnttab_grant_copy(xendev->gnttabdev, to_domain, xen_domid,
+                                    segs, nr_segs, NULL);
     if (rc) {
-        xen_pv_printf(xendev, 0, "xengnttab_copy failed: %s\n",
-                      strerror(errno));
-    }
-
-    for (i = 0; i < nr_segs; i++) {
-        xengnttab_grant_copy_segment_t *xengnttab_seg =
-            &xengnttab_segs[i];
-
-        if (xengnttab_seg->status != GNTST_okay) {
-            xen_pv_printf(xendev, 0, "segment[%u] status: %d\n", i,
-                          xengnttab_seg->status);
-            rc = -1;
-        }
+        xen_pv_printf(xendev, 0, "xengnttab_grant_copy failed: %s\n",
+                      strerror(-rc));
     }
-
-    g_free(xengnttab_segs);
     return rc;
 }
 
@@ -466,7 +373,7 @@ static int xen_be_try_initialise(struct XenLegacyDevice *xendev)
     }
 
     if (xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV) {
-        xendev->gnttabdev = xengnttab_open(NULL, 0);
+        xendev->gnttabdev = qemu_xen_gnttab_open();
         if (xendev->gnttabdev == NULL) {
             xen_pv_printf(NULL, 0, "can't open gnttab device\n");
             return -1;
@@ -524,7 +431,7 @@ static void xen_be_disconnect(struct XenLegacyDevice *xendev,
         xendev->ops->disconnect(xendev);
     }
     if (xendev->gnttabdev) {
-        xengnttab_close(xendev->gnttabdev);
+        qemu_xen_gnttab_close(xendev->gnttabdev);
         xendev->gnttabdev = NULL;
     }
     if (xendev->be_state != state) {
@@ -678,8 +585,6 @@ void xenstore_update_fe(char *watch, struct XenLegacyDevice *xendev)
 
 int xen_be_init(void)
 {
-    xengnttab_handle *gnttabdev;
-
     xenstore = xs_daemon_open();
     if (!xenstore) {
         xen_pv_printf(NULL, 0, "can't connect to xenstored\n");
@@ -688,19 +593,11 @@ int xen_be_init(void)
 
     qemu_set_fd_handler(xs_fileno(xenstore), xenstore_update, NULL, NULL);
 
-    if (xen_xc == NULL || xen_fmem == NULL) {
+    if (xen_evtchn_ops == NULL || xen_gnttab_ops == NULL) {
         /* Check if xen_init() have been called */
         goto err;
     }
 
-    gnttabdev = xengnttab_open(NULL, 0);
-    if (gnttabdev != NULL) {
-        if (xengnttab_grant_copy(gnttabdev, 0, NULL) == 0) {
-            xen_feature_grant_copy = true;
-        }
-        xengnttab_close(gnttabdev);
-    }
-
     xen_sysdev = qdev_new(TYPE_XENSYSDEV);
     sysbus_realize_and_unref(SYS_BUS_DEVICE(xen_sysdev), &error_fatal);
     xen_sysbus = qbus_new(TYPE_XENSYSBUS, xen_sysdev, "xen-sysbus");
diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c
index 1a959d89e8..eb5fb0a59f 100644
--- a/hw/xen/xen-operations.c
+++ b/hw/xen/xen-operations.c
@@ -21,6 +21,7 @@
  * must be undefined before including xenctrl.h
  */
 #undef XC_WANT_COMPAT_EVTCHN_API
+#undef XC_WANT_COMPAT_GNTTAB_API
 
 #include <xenctrl.h>
 
@@ -43,12 +44,141 @@ typedef evtchn_port_or_error_t xenevtchn_port_or_error_t;
 #define xenevtchn_unmask(h, p) xc_evtchn_unmask(h, p)
 #define xenevtchn_unbind(h, p) xc_evtchn_unbind(h, p)
 
+typedef xc_gnttab xengnttab_handle;
+
+#define xengnttab_open(l, f) xc_gnttab_open(l, f)
+#define xengnttab_close(h) xc_gnttab_close(h)
+#define xengnttab_set_max_grants(h, n) xc_gnttab_set_max_grants(h, n)
+#define xengnttab_map_grant_ref(h, d, r, p) xc_gnttab_map_grant_ref(h, d, r, p)
+#define xengnttab_unmap(h, a, n) xc_gnttab_munmap(h, a, n)
+#define xengnttab_map_grant_refs(h, c, d, r, p) \
+    xc_gnttab_map_grant_refs(h, c, d, r, p)
+#define xengnttab_map_domain_grant_refs(h, c, d, r, p) \
+    xc_gnttab_map_domain_grant_refs(h, c, d, r, p)
+
 #else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
 
 #include <xenevtchn.h>
+#include <xengnttab.h>
 
 #endif
 
+/* Xen before 4.8 */
+
+static int libxengnttab_fallback_grant_copy(xengnttab_handle *xgt,
+                                            bool to_domain, uint32_t domid,
+                                            XenGrantCopySegment segs[],
+                                            unsigned int nr_segs, Error **errp)
+{
+    uint32_t *refs = g_new(uint32_t, nr_segs);
+    int prot = to_domain ? PROT_WRITE : PROT_READ;
+    void *map;
+    unsigned int i;
+    int rc = 0;
+
+    for (i = 0; i < nr_segs; i++) {
+        XenGrantCopySegment *seg = &segs[i];
+
+        refs[i] = to_domain ? seg->dest.foreign.ref :
+            seg->source.foreign.ref;
+    }
+    map = xengnttab_map_domain_grant_refs(xgt, nr_segs, domid, refs, prot);
+    if (!map) {
+        if (errp) {
+            error_setg_errno(errp, errno,
+                             "xengnttab_map_domain_grant_refs failed");
+        }
+        rc = -errno;
+        goto done;
+    }
+
+    for (i = 0; i < nr_segs; i++) {
+        XenGrantCopySegment *seg = &segs[i];
+        void *page = map + (i * XEN_PAGE_SIZE);
+
+        if (to_domain) {
+            memcpy(page + seg->dest.foreign.offset, seg->source.virt,
+                   seg->len);
+        } else {
+            memcpy(seg->dest.virt, page + seg->source.foreign.offset,
+                   seg->len);
+        }
+    }
+
+    if (xengnttab_unmap(xgt, map, nr_segs)) {
+        if (errp) {
+            error_setg_errno(errp, errno, "xengnttab_unmap failed");
+        }
+        rc = -errno;
+    }
+
+done:
+    g_free(refs);
+    return rc;
+}
+
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800
+
+static int libxengnttab_backend_grant_copy(xengnttab_handle *xgt,
+                                           bool to_domain, uint32_t domid,
+                                           XenGrantCopySegment *segs,
+                                           uint32_t nr_segs, Error **errp)
+{
+    xengnttab_grant_copy_segment_t *xengnttab_segs;
+    unsigned int i;
+    int rc;
+
+    xengnttab_segs = g_new0(xengnttab_grant_copy_segment_t, nr_segs);
+
+    for (i = 0; i < nr_segs; i++) {
+        XenGrantCopySegment *seg = &segs[i];
+        xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
+
+        if (to_domain) {
+            xengnttab_seg->flags = GNTCOPY_dest_gref;
+            xengnttab_seg->dest.foreign.domid = domid;
+            xengnttab_seg->dest.foreign.ref = seg->dest.foreign.ref;
+            xengnttab_seg->dest.foreign.offset = seg->dest.foreign.offset;
+            xengnttab_seg->source.virt = seg->source.virt;
+        } else {
+            xengnttab_seg->flags = GNTCOPY_source_gref;
+            xengnttab_seg->source.foreign.domid = domid;
+            xengnttab_seg->source.foreign.ref = seg->source.foreign.ref;
+            xengnttab_seg->source.foreign.offset =
+                seg->source.foreign.offset;
+            xengnttab_seg->dest.virt = seg->dest.virt;
+        }
+
+        xengnttab_seg->len = seg->len;
+    }
+
+    if (xengnttab_grant_copy(xgt, nr_segs, xengnttab_segs)) {
+        if (errp) {
+            error_setg_errno(errp, errno, "xengnttab_grant_copy failed");
+        }
+        rc = -errno;
+        goto done;
+    }
+
+    rc = 0;
+    for (i = 0; i < nr_segs; i++) {
+        xengnttab_grant_copy_segment_t *xengnttab_seg = &xengnttab_segs[i];
+
+        if (xengnttab_seg->status != GNTST_okay) {
+            if (errp) {
+                error_setg(errp, "xengnttab_grant_copy seg[%u] failed", i);
+            }
+            rc = -EIO;
+            break;
+        }
+    }
+
+done:
+    g_free(xengnttab_segs);
+    return rc;
+}
+#endif
+
 static xenevtchn_handle *libxenevtchn_backend_open(void)
 {
     return xenevtchn_open(NULL, 0);
@@ -65,7 +195,34 @@ struct evtchn_backend_ops libxenevtchn_backend_ops = {
     .pending = xenevtchn_pending,
 };
 
+static xengnttab_handle *libxengnttab_backend_open(void)
+{
+    return xengnttab_open(NULL, 0);
+}
+
+
+static struct gnttab_backend_ops libxengnttab_backend_ops = {
+    .features = XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE,
+    .open = libxengnttab_backend_open,
+    .close = xengnttab_close,
+    .grant_copy = libxengnttab_fallback_grant_copy,
+    .set_max_grants = xengnttab_set_max_grants,
+    .map_refs = xengnttab_map_domain_grant_refs,
+    .unmap = xengnttab_unmap,
+};
+
 void setup_xen_backend_ops(void)
 {
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800
+    xengnttab_handle *xgt = xengnttab_open(NULL, 0);
+
+    if (xgt) {
+        if (xengnttab_grant_copy(xgt, 0, NULL) == 0) {
+            xen_gnttab_ops->grant_copy = libxengnttab_backend_grant_copy;
+        }
+        xengnttab_close(xgt);
+    }
+#endif
     xen_evtchn_ops = &libxenevtchn_backend_ops;
+    xen_gnttab_ops = &libxengnttab_backend_ops;
 }
diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c
index 86a2c8e567..d8582cc74c 100644
--- a/hw/xen/xen_pvdev.c
+++ b/hw/xen/xen_pvdev.c
@@ -309,7 +309,7 @@ void xen_pv_del_xendev(struct XenLegacyDevice *xendev)
         qemu_xen_evtchn_close(xendev->evtchndev);
     }
     if (xendev->gnttabdev != NULL) {
-        xengnttab_close(xendev->gnttabdev);
+        qemu_xen_gnttab_close(xendev->gnttabdev);
     }
 
     QTAILQ_REMOVE(&xendevs, xendev, next);
diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h
index e21b83796e..72d71d1eb7 100644
--- a/include/hw/xen/xen-bus.h
+++ b/include/hw/xen/xen-bus.h
@@ -9,7 +9,7 @@
 #define HW_XEN_BUS_H
 
 #include "hw/xen/xen_backend_ops.h"
-#include "hw/xen/xen_common.h"
+#include "hw/xen/interface/io/xenbus.h"
 #include "hw/sysbus.h"
 #include "qemu/notify.h"
 #include "qom/object.h"
@@ -33,7 +33,6 @@ struct XenDevice {
     bool backend_online;
     XenWatch *backend_online_watch;
     xengnttab_handle *xgth;
-    bool feature_grant_copy;
     bool inactive;
     QLIST_HEAD(, XenEventChannel) event_channels;
     QLIST_ENTRY(XenDevice) list;
diff --git a/include/hw/xen/xen-legacy-backend.h b/include/hw/xen/xen-legacy-backend.h
index 0ef9b772ac..01c12952b7 100644
--- a/include/hw/xen/xen-legacy-backend.h
+++ b/include/hw/xen/xen-legacy-backend.h
@@ -1,8 +1,8 @@
 #ifndef HW_XEN_LEGACY_BACKEND_H
 #define HW_XEN_LEGACY_BACKEND_H
 
-#include "hw/xen/xen_common.h"
 #include "hw/xen/xen_backend_ops.h"
+#include "hw/xen/interface/io/xenbus.h"
 #include "hw/xen/xen_pvdev.h"
 #include "net/net.h"
 #include "qom/object.h"
@@ -55,17 +55,6 @@ void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs,
 void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr,
                              unsigned int nr_refs);
 
-typedef struct XenGrantCopySegment {
-    union {
-        void *virt;
-        struct {
-            uint32_t ref;
-            off_t offset;
-        } foreign;
-    } source, dest;
-    size_t len;
-} XenGrantCopySegment;
-
 int xen_be_copy_grant_refs(struct XenLegacyDevice *xendev,
                            bool to_domain, XenGrantCopySegment segs[],
                            unsigned int nr_segs);
diff --git a/include/hw/xen/xen_backend_ops.h b/include/hw/xen/xen_backend_ops.h
index 9605456e81..acb7838e63 100644
--- a/include/hw/xen/xen_backend_ops.h
+++ b/include/hw/xen/xen_backend_ops.h
@@ -29,6 +29,12 @@
 typedef struct xenevtchn_handle xenevtchn_handle;
 typedef int xenevtchn_port_or_error_t;
 typedef uint32_t evtchn_port_t;
+typedef uint16_t domid_t;
+typedef uint32_t grant_ref_t;
+
+#define XEN_PAGE_SHIFT       12
+#define XEN_PAGE_SIZE        (1UL << XEN_PAGE_SHIFT)
+#define XEN_PAGE_MASK        (~(XEN_PAGE_SIZE-1))
 
 struct evtchn_backend_ops {
     xenevtchn_handle *(*open)(void);
@@ -113,6 +119,93 @@ static inline int qemu_xen_evtchn_pending(xenevtchn_handle *xc)
     return xen_evtchn_ops->pending(xc);
 }
 
+typedef struct xengntdev_handle xengnttab_handle;
+
+typedef struct XenGrantCopySegment {
+    union {
+        void *virt;
+        struct {
+            uint32_t ref;
+            off_t offset;
+        } foreign;
+    } source, dest;
+    size_t len;
+} XenGrantCopySegment;
+
+#define XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE  (1U << 0)
+
+struct gnttab_backend_ops {
+    uint32_t features;
+    xengnttab_handle *(*open)(void);
+    int (*close)(xengnttab_handle *xgt);
+    int (*grant_copy)(xengnttab_handle *xgt, bool to_domain, uint32_t domid,
+                      XenGrantCopySegment *segs, uint32_t nr_segs,
+                      Error **errp);
+    int (*set_max_grants)(xengnttab_handle *xgt, uint32_t nr_grants);
+    void *(*map_refs)(xengnttab_handle *xgt, uint32_t count, uint32_t domid,
+                      uint32_t *refs, int prot);
+    int (*unmap)(xengnttab_handle *xgt, void *start_address, uint32_t count);
+};
+
+extern struct gnttab_backend_ops *xen_gnttab_ops;
+
+static inline xengnttab_handle *qemu_xen_gnttab_open(void)
+{
+    if (!xen_gnttab_ops) {
+        return NULL;
+    }
+    return xen_gnttab_ops->open();
+}
+
+static inline int qemu_xen_gnttab_close(xengnttab_handle *xgt)
+{
+    if (!xen_gnttab_ops) {
+        return -ENOSYS;
+    }
+    return xen_gnttab_ops->close(xgt);
+}
+
+static inline int qemu_xen_gnttab_grant_copy(xengnttab_handle *xgt,
+                                             bool to_domain, uint32_t domid,
+                                             XenGrantCopySegment *segs,
+                                             uint32_t nr_segs, Error **errp)
+{
+    if (!xen_gnttab_ops) {
+        return -ENOSYS;
+    }
+
+    return xen_gnttab_ops->grant_copy(xgt, to_domain, domid, segs, nr_segs,
+                                      errp);
+}
+
+static inline int qemu_xen_gnttab_set_max_grants(xengnttab_handle *xgt, uint32_t nr_grants)
+{
+    if (!xen_gnttab_ops) {
+        return -ENOSYS;
+    }
+    return xen_gnttab_ops->set_max_grants(xgt, nr_grants);
+}
+
+static inline void *qemu_xen_gnttab_map_refs(xengnttab_handle *xgt, uint32_t count,
+                                             uint32_t domid, uint32_t *refs,
+                                             int prot)
+{
+    if (!xen_gnttab_ops) {
+        return NULL;
+    }
+    return xen_gnttab_ops->map_refs(xgt, count, domid, refs, prot);
+}
+
+static inline int qemu_xen_gnttab_unmap(xengnttab_handle *xgt,
+                                        void *start_address,
+                                        uint32_t count)
+{
+    if (!xen_gnttab_ops) {
+        return -ENOSYS;
+    }
+    return xen_gnttab_ops->unmap(xgt, start_address, count);
+}
+
 void setup_xen_backend_ops(void);
 
 #endif /* QEMU_XEN_BACKEND_OPS_H */
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
index 34abd0a772..d4d10d3ff1 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -28,17 +28,6 @@ extern xc_interface *xen_xc;
 #if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701
 
 typedef xc_interface xenforeignmemory_handle;
-typedef xc_gnttab xengnttab_handle;
-
-#define xengnttab_open(l, f) xc_gnttab_open(l, f)
-#define xengnttab_close(h) xc_gnttab_close(h)
-#define xengnttab_set_max_grants(h, n) xc_gnttab_set_max_grants(h, n)
-#define xengnttab_map_grant_ref(h, d, r, p) xc_gnttab_map_grant_ref(h, d, r, p)
-#define xengnttab_unmap(h, a, n) xc_gnttab_munmap(h, a, n)
-#define xengnttab_map_grant_refs(h, c, d, r, p) \
-    xc_gnttab_map_grant_refs(h, c, d, r, p)
-#define xengnttab_map_domain_grant_refs(h, c, d, r, p) \
-    xc_gnttab_map_domain_grant_refs(h, c, d, r, p)
 
 #define xenforeignmemory_open(l, f) xen_xc
 #define xenforeignmemory_close(h)
@@ -58,7 +47,6 @@ static inline void *xenforeignmemory_map(xc_interface *h, uint32_t dom,
 
 #else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
 
-#include <xengnttab.h>
 #include <xenforeignmemory.h>
 
 #endif
@@ -648,31 +636,4 @@ static inline int xen_set_ioreq_server_state(domid_t dom,
 
 #endif
 
-/* Xen before 4.8 */
-
-#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40800
-
-struct xengnttab_grant_copy_segment {
-    union xengnttab_copy_ptr {
-        void *virt;
-        struct {
-            uint32_t ref;
-            uint16_t offset;
-            uint16_t domid;
-        } foreign;
-    } source, dest;
-    uint16_t len;
-    uint16_t flags;
-    int16_t status;
-};
-
-typedef struct xengnttab_grant_copy_segment xengnttab_grant_copy_segment_t;
-
-static inline int xengnttab_grant_copy(xengnttab_handle *xgt, uint32_t count,
-                                       xengnttab_grant_copy_segment_t *segs)
-{
-    return -ENOSYS;
-}
-#endif
-
 #endif /* QEMU_HW_XEN_COMMON_H */
diff --git a/softmmu/globals.c b/softmmu/globals.c
index eb62739be1..23bb27f0f6 100644
--- a/softmmu/globals.c
+++ b/softmmu/globals.c
@@ -66,3 +66,4 @@ uint32_t xen_domid;
 enum xen_mode xen_mode = XEN_DISABLED;
 bool xen_domid_restrict;
 struct evtchn_backend_ops *xen_evtchn_ops;
+struct gnttab_backend_ops *xen_gnttab_ops;
-- 
2.39.0
[RFC PATCH v7bis 04/19] hw/xen: Pass grant ref to gnttab unmap
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

Under real Xen we only need to munmap() but in the emulation code will want
to know the original ref to keep the the tracking straight.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/9pfs/xen-9p-backend.c            |  7 ++++---
 hw/block/dataplane/xen-block.c      |  1 +
 hw/char/xen_console.c               |  2 +-
 hw/net/xen_nic.c                    | 13 ++++++++-----
 hw/usb/xen-usb.c                    | 21 ++++++++++++++++-----
 hw/xen/xen-bus.c                    |  4 ++--
 hw/xen/xen-legacy-backend.c         |  4 ++--
 hw/xen/xen-operations.c             |  9 ++++++++-
 include/hw/xen/xen-bus.h            |  2 +-
 include/hw/xen/xen-legacy-backend.h |  6 +++---
 include/hw/xen/xen_backend_ops.h    |  7 ++++---
 11 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index 864bdaf952..d8bb0e847c 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -359,12 +359,13 @@ static int xen_9pfs_free(struct XenLegacyDevice *xendev)
         if (xen_9pdev->rings[i].data != NULL) {
             xen_be_unmap_grant_refs(&xen_9pdev->xendev,
                                     xen_9pdev->rings[i].data,
+                                    xen_9pdev->rings[i].intf->ref,
                                     (1 << xen_9pdev->rings[i].ring_order));
         }
         if (xen_9pdev->rings[i].intf != NULL) {
-            xen_be_unmap_grant_refs(&xen_9pdev->xendev,
-                                    xen_9pdev->rings[i].intf,
-                                    1);
+            xen_be_unmap_grant_ref(&xen_9pdev->xendev,
+                                   xen_9pdev->rings[i].intf,
+                                   xen_9pdev->rings[i].ref);
         }
         if (xen_9pdev->rings[i].bh != NULL) {
             qemu_bh_delete(xen_9pdev->rings[i].bh);
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 2785b9e849..e55b713002 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -705,6 +705,7 @@ void xen_block_dataplane_stop(XenBlockDataPlane *dataplane)
         Error *local_err = NULL;
 
         xen_device_unmap_grant_refs(xendev, dataplane->sring,
+                                    dataplane->ring_ref,
                                     dataplane->nr_ring_ref, &local_err);
         dataplane->sring = NULL;
 
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index 63153dfde4..19ad6c946a 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -271,7 +271,7 @@ static void con_disconnect(struct XenLegacyDevice *xendev)
         if (!xendev->dev) {
             xenforeignmemory_unmap(xen_fmem, con->sring, 1);
         } else {
-            xen_be_unmap_grant_ref(xendev, con->sring);
+            xen_be_unmap_grant_ref(xendev, con->sring, con->ring_ref);
         }
         con->sring = NULL;
     }
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 7d92c2d022..166d03787d 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -181,7 +181,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
                 qemu_send_packet(qemu_get_queue(netdev->nic),
                                  page + txreq.offset, txreq.size);
             }
-            xen_be_unmap_grant_ref(&netdev->xendev, page);
+            xen_be_unmap_grant_ref(&netdev->xendev, page, txreq.gref);
             net_tx_response(netdev, &txreq, NETIF_RSP_OKAY);
         }
         if (!netdev->tx_work) {
@@ -261,7 +261,7 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size
         return -1;
     }
     memcpy(page + NET_IP_ALIGN, buf, size);
-    xen_be_unmap_grant_ref(&netdev->xendev, page);
+    xen_be_unmap_grant_ref(&netdev->xendev, page, rxreq.gref);
     net_rx_response(netdev, &rxreq, NETIF_RSP_OKAY, NET_IP_ALIGN, size, 0);
 
     return size;
@@ -343,7 +343,8 @@ static int net_connect(struct XenLegacyDevice *xendev)
                                        netdev->rx_ring_ref,
                                        PROT_READ | PROT_WRITE);
     if (!netdev->rxs) {
-        xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs);
+        xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs,
+                               netdev->tx_ring_ref);
         netdev->txs = NULL;
         return -1;
     }
@@ -368,11 +369,13 @@ static void net_disconnect(struct XenLegacyDevice *xendev)
     xen_pv_unbind_evtchn(&netdev->xendev);
 
     if (netdev->txs) {
-        xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs);
+        xen_be_unmap_grant_ref(&netdev->xendev, netdev->txs,
+                               netdev->tx_ring_ref);
         netdev->txs = NULL;
     }
     if (netdev->rxs) {
-        xen_be_unmap_grant_ref(&netdev->xendev, netdev->rxs);
+        xen_be_unmap_grant_ref(&netdev->xendev, netdev->rxs,
+                               netdev->rx_ring_ref);
         netdev->rxs = NULL;
     }
 }
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index 0f7369e7ed..a770a64cb4 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -101,6 +101,8 @@ struct usbback_hotplug {
 struct usbback_info {
     struct XenLegacyDevice         xendev;  /* must be first */
     USBBus                   bus;
+    uint32_t                 urb_ring_ref;
+    uint32_t                 conn_ring_ref;
     void                     *urb_sring;
     void                     *conn_sring;
     struct usbif_urb_back_ring urb_ring;
@@ -277,10 +279,11 @@ static int usbback_init_packet(struct usbback_req *usbback_req)
 static void usbback_do_response(struct usbback_req *usbback_req, int32_t status,
                                 int32_t actual_length, int32_t error_count)
 {
+    uint32_t ref[USBIF_MAX_SEGMENTS_PER_REQUEST];
     struct usbback_info *usbif;
     struct usbif_urb_response *res;
     struct XenLegacyDevice *xendev;
-    unsigned int notify;
+    unsigned int notify, i;
 
     usbif = usbback_req->usbif;
     xendev = &usbif->xendev;
@@ -293,13 +296,19 @@ static void usbback_do_response(struct usbback_req *usbback_req, int32_t status,
     }
 
     if (usbback_req->buffer) {
-        xen_be_unmap_grant_refs(xendev, usbback_req->buffer,
+        for (i = 0; i < usbback_req->nr_buffer_segs; i++) {
+            ref[i] = usbback_req->req.seg[i].gref;
+        }
+        xen_be_unmap_grant_refs(xendev, usbback_req->buffer, ref,
                                 usbback_req->nr_buffer_segs);
         usbback_req->buffer = NULL;
     }
 
     if (usbback_req->isoc_buffer) {
-        xen_be_unmap_grant_refs(xendev, usbback_req->isoc_buffer,
+        for (i = 0; i < usbback_req->nr_extra_segs; i++) {
+            ref[i] = usbback_req->req.seg[i + usbback_req->req.nr_buffer_segs].gref;
+        }
+        xen_be_unmap_grant_refs(xendev, usbback_req->isoc_buffer, ref,
                                 usbback_req->nr_extra_segs);
         usbback_req->isoc_buffer = NULL;
     }
@@ -832,11 +841,11 @@ static void usbback_disconnect(struct XenLegacyDevice *xendev)
     xen_pv_unbind_evtchn(xendev);
 
     if (usbif->urb_sring) {
-        xen_be_unmap_grant_ref(xendev, usbif->urb_sring);
+        xen_be_unmap_grant_ref(xendev, usbif->urb_sring, usbif->urb_ring_ref);
         usbif->urb_sring = NULL;
     }
     if (usbif->conn_sring) {
-        xen_be_unmap_grant_ref(xendev, usbif->conn_sring);
+        xen_be_unmap_grant_ref(xendev, usbif->conn_sring, usbif->conn_ring_ref);
         usbif->conn_sring = NULL;
     }
 
@@ -889,6 +898,8 @@ static int usbback_connect(struct XenLegacyDevice *xendev)
         return -1;
     }
 
+    usbif->urb_ring_ref = urb_ring_ref;
+    usbif->conn_ring_ref = conn_ring_ref;
     urb_sring = usbif->urb_sring;
     conn_sring = usbif->conn_sring;
     BACK_RING_INIT(&usbif->urb_ring, urb_sring, XC_PAGE_SIZE);
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index b247e86f28..aee6a8c9b0 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -967,10 +967,10 @@ void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs,
     return map;
 }
 
-void xen_device_unmap_grant_refs(XenDevice *xendev, void *map,
+void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, uint32_t *refs,
                                  unsigned int nr_refs, Error **errp)
 {
-    if (qemu_xen_gnttab_unmap(xendev->xgth, map, nr_refs)) {
+    if (qemu_xen_gnttab_unmap(xendev->xgth, map, refs, nr_refs)) {
         error_setg_errno(errp, errno, "xengnttab_unmap failed");
     }
 }
diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c
index 88d2ae3f0a..047463d89c 100644
--- a/hw/xen/xen-legacy-backend.c
+++ b/hw/xen/xen-legacy-backend.c
@@ -137,11 +137,11 @@ void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs,
 }
 
 void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr,
-                             unsigned int nr_refs)
+                             uint32_t *refs, unsigned int nr_refs)
 {
     assert(xendev->ops->flags & DEVOPS_FLAG_NEED_GNTDEV);
 
-    if (qemu_xen_gnttab_unmap(xendev->gnttabdev, ptr, nr_refs)) {
+    if (qemu_xen_gnttab_unmap(xendev->gnttabdev, ptr, refs, nr_refs)) {
         xen_pv_printf(xendev, 0, "xengnttab_unmap failed: %s\n",
                       strerror(errno));
     }
diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c
index eb5fb0a59f..73dabac8e5 100644
--- a/hw/xen/xen-operations.c
+++ b/hw/xen/xen-operations.c
@@ -200,6 +200,13 @@ static xengnttab_handle *libxengnttab_backend_open(void)
     return xengnttab_open(NULL, 0);
 }
 
+static int libxengnttab_backend_unmap(xengnttab_handle *xgt,
+                                      void *start_address, uint32_t *refs,
+                                      uint32_t count)
+{
+    return xengnttab_unmap(xgt, start_address, count);
+}
+
 
 static struct gnttab_backend_ops libxengnttab_backend_ops = {
     .features = XEN_GNTTAB_OP_FEATURE_MAP_MULTIPLE,
@@ -208,7 +215,7 @@ static struct gnttab_backend_ops libxengnttab_backend_ops = {
     .grant_copy = libxengnttab_fallback_grant_copy,
     .set_max_grants = xengnttab_set_max_grants,
     .map_refs = xengnttab_map_domain_grant_refs,
-    .unmap = xengnttab_unmap,
+    .unmap = libxengnttab_backend_unmap,
 };
 
 void setup_xen_backend_ops(void)
diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h
index 72d71d1eb7..5a90e79d53 100644
--- a/include/hw/xen/xen-bus.h
+++ b/include/hw/xen/xen-bus.h
@@ -102,7 +102,7 @@ void xen_device_set_max_grant_refs(XenDevice *xendev, unsigned int nr_refs,
 void *xen_device_map_grant_refs(XenDevice *xendev, uint32_t *refs,
                                 unsigned int nr_refs, int prot,
                                 Error **errp);
-void xen_device_unmap_grant_refs(XenDevice *xendev, void *map,
+void xen_device_unmap_grant_refs(XenDevice *xendev, void *map, uint32_t *refs,
                                  unsigned int nr_refs, Error **errp);
 
 typedef struct XenDeviceGrantCopySegment {
diff --git a/include/hw/xen/xen-legacy-backend.h b/include/hw/xen/xen-legacy-backend.h
index 01c12952b7..ea84a9246f 100644
--- a/include/hw/xen/xen-legacy-backend.h
+++ b/include/hw/xen/xen-legacy-backend.h
@@ -53,7 +53,7 @@ void xen_be_set_max_grant_refs(struct XenLegacyDevice *xendev,
 void *xen_be_map_grant_refs(struct XenLegacyDevice *xendev, uint32_t *refs,
                             unsigned int nr_refs, int prot);
 void xen_be_unmap_grant_refs(struct XenLegacyDevice *xendev, void *ptr,
-                             unsigned int nr_refs);
+                             uint32_t *refs, unsigned int nr_refs);
 
 int xen_be_copy_grant_refs(struct XenLegacyDevice *xendev,
                            bool to_domain, XenGrantCopySegment segs[],
@@ -66,9 +66,9 @@ static inline void *xen_be_map_grant_ref(struct XenLegacyDevice *xendev,
 }
 
 static inline void xen_be_unmap_grant_ref(struct XenLegacyDevice *xendev,
-                                          void *ptr)
+                                          void *ptr, uint32_t ref)
 {
-    return xen_be_unmap_grant_refs(xendev, ptr, 1);
+    return xen_be_unmap_grant_refs(xendev, ptr, &ref, 1);
 }
 
 /* actual backend drivers */
diff --git a/include/hw/xen/xen_backend_ops.h b/include/hw/xen/xen_backend_ops.h
index acb7838e63..3e9392d3fa 100644
--- a/include/hw/xen/xen_backend_ops.h
+++ b/include/hw/xen/xen_backend_ops.h
@@ -144,7 +144,8 @@ struct gnttab_backend_ops {
     int (*set_max_grants)(xengnttab_handle *xgt, uint32_t nr_grants);
     void *(*map_refs)(xengnttab_handle *xgt, uint32_t count, uint32_t domid,
                       uint32_t *refs, int prot);
-    int (*unmap)(xengnttab_handle *xgt, void *start_address, uint32_t count);
+    int (*unmap)(xengnttab_handle *xgt, void *start_address, uint32_t *refs,
+                 uint32_t count);
 };
 
 extern struct gnttab_backend_ops *xen_gnttab_ops;
@@ -197,13 +198,13 @@ static inline void *qemu_xen_gnttab_map_refs(xengnttab_handle *xgt, uint32_t cou
 }
 
 static inline int qemu_xen_gnttab_unmap(xengnttab_handle *xgt,
-                                        void *start_address,
+                                        void *start_address, uint32_t *refs,
                                         uint32_t count)
 {
     if (!xen_gnttab_ops) {
         return -ENOSYS;
     }
-    return xen_gnttab_ops->unmap(xgt, start_address, count);
+    return xen_gnttab_ops->unmap(xgt, start_address, refs, count);
 }
 
 void setup_xen_backend_ops(void);
-- 
2.39.0
[RFC PATCH v7bis 05/19] hw/xen: Add foreignmem operations to allow redirection to internal emulation
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Signed-off-by: Paul Durrant <pdurrant@amazon.com>
---
 hw/char/xen_console.c            |  8 ++--
 hw/display/xenfb.c               | 20 +++++-----
 hw/xen/xen-operations.c          | 63 ++++++++++++++++++++++++++++++++
 include/hw/xen/xen_backend_ops.h | 26 +++++++++++++
 include/hw/xen/xen_common.h      | 13 -------
 softmmu/globals.c                |  1 +
 6 files changed, 105 insertions(+), 26 deletions(-)

diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index 19ad6c946a..e9cef3e1ef 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -237,9 +237,9 @@ static int con_initialise(struct XenLegacyDevice *xendev)
 
     if (!xendev->dev) {
         xen_pfn_t mfn = con->ring_ref;
-        con->sring = xenforeignmemory_map(xen_fmem, con->xendev.dom,
-                                          PROT_READ | PROT_WRITE,
-                                          1, &mfn, NULL);
+        con->sring = qemu_xen_foreignmem_map(con->xendev.dom, NULL,
+                                             PROT_READ | PROT_WRITE,
+                                             1, &mfn, NULL);
     } else {
         con->sring = xen_be_map_grant_ref(xendev, con->ring_ref,
                                           PROT_READ | PROT_WRITE);
@@ -269,7 +269,7 @@ static void con_disconnect(struct XenLegacyDevice *xendev)
 
     if (con->sring) {
         if (!xendev->dev) {
-            xenforeignmemory_unmap(xen_fmem, con->sring, 1);
+            qemu_xen_foreignmem_unmap(con->sring, 1);
         } else {
             xen_be_unmap_grant_ref(xendev, con->sring, con->ring_ref);
         }
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 260eb38a76..2c4016fcbd 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -98,8 +98,9 @@ static int common_bind(struct common *c)
     if (xenstore_read_fe_int(&c->xendev, "event-channel", &c->xendev.remote_port) == -1)
         return -1;
 
-    c->page = xenforeignmemory_map(xen_fmem, c->xendev.dom,
-                                   PROT_READ | PROT_WRITE, 1, &mfn, NULL);
+    c->page = qemu_xen_foreignmem_map(c->xendev.dom, NULL,
+                                      PROT_READ | PROT_WRITE, 1, &mfn,
+                                      NULL);
     if (c->page == NULL)
         return -1;
 
@@ -115,7 +116,7 @@ static void common_unbind(struct common *c)
 {
     xen_pv_unbind_evtchn(&c->xendev);
     if (c->page) {
-        xenforeignmemory_unmap(xen_fmem, c->page, 1);
+        qemu_xen_foreignmem_unmap(c->page, 1);
         c->page = NULL;
     }
 }
@@ -500,15 +501,16 @@ static int xenfb_map_fb(struct XenFB *xenfb)
     fbmfns = g_new0(xen_pfn_t, xenfb->fbpages);
 
     xenfb_copy_mfns(mode, n_fbdirs, pgmfns, pd);
-    map = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom,
-                               PROT_READ, n_fbdirs, pgmfns, NULL);
+    map = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL, PROT_READ,
+                                  n_fbdirs, pgmfns, NULL);
     if (map == NULL)
         goto out;
     xenfb_copy_mfns(mode, xenfb->fbpages, fbmfns, map);
-    xenforeignmemory_unmap(xen_fmem, map, n_fbdirs);
+    qemu_xen_foreignmem_unmap(map, n_fbdirs);
 
-    xenfb->pixels = xenforeignmemory_map(xen_fmem, xenfb->c.xendev.dom,
-            PROT_READ, xenfb->fbpages, fbmfns, NULL);
+    xenfb->pixels = qemu_xen_foreignmem_map(xenfb->c.xendev.dom, NULL,
+                                            PROT_READ, xenfb->fbpages,
+                                            fbmfns, NULL);
     if (xenfb->pixels == NULL)
         goto out;
 
@@ -927,7 +929,7 @@ static void fb_disconnect(struct XenLegacyDevice *xendev)
      *   Replacing the framebuffer with anonymous shared memory
      *   instead.  This releases the guest pages and keeps qemu happy.
      */
-    xenforeignmemory_unmap(xen_fmem, fb->pixels, fb->fbpages);
+    qemu_xen_foreignmem_unmap(fb->pixels, fb->fbpages);
     fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE,
                       PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON,
                       -1, 0);
diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c
index 73dabac8e5..4c6b305cc4 100644
--- a/hw/xen/xen-operations.c
+++ b/hw/xen/xen-operations.c
@@ -22,6 +22,7 @@
  */
 #undef XC_WANT_COMPAT_EVTCHN_API
 #undef XC_WANT_COMPAT_GNTTAB_API
+#undef XC_WANT_COMPAT_MAP_FOREIGN_API
 
 #include <xenctrl.h>
 
@@ -56,10 +57,13 @@ typedef xc_gnttab xengnttab_handle;
 #define xengnttab_map_domain_grant_refs(h, c, d, r, p) \
     xc_gnttab_map_domain_grant_refs(h, c, d, r, p)
 
+typedef xc_interface xenforeignmemory_handle;
+
 #else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
 
 #include <xenevtchn.h>
 #include <xengnttab.h>
+#include <xenforeignmemory.h>
 
 #endif
 
@@ -218,6 +222,64 @@ static struct gnttab_backend_ops libxengnttab_backend_ops = {
     .unmap = libxengnttab_backend_unmap,
 };
 
+#if CONFIG_XEN_CTRL_INTERFACE_VERSION < 40701
+
+static void *libxenforeignmem_backend_map(uint32_t dom, void *addr, int prot,
+                                          size_t pages,
+                                          uint64_t *pfns, int *errs)
+{
+    if (errs)
+        return xc_map_foreign_bulk(xen_xc, dom, prot, arr, err, pages);
+    else
+        return xc_map_foreign_pages(xen_xc, dom, prot, arr, pages);
+}
+
+static int libxenforeignmem_backend_unmap(void *addr, size_t pages)
+{
+    return munmap(addr, pages * XC_PAGE_SIZE);
+}
+
+#else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
+
+static void *libxenforeignmem_backend_map(uint32_t dom, void *addr, int prot,
+                                          size_t pages,
+                                          uint64_t *pfns, int *errs)
+{
+    xen_pfn_t *xen_pfns;
+    void *ptr = NULL;
+
+    if (sizeof(pfns[0]) == sizeof(xen_pfn_t)) {
+        xen_pfns = (xen_pfn_t *)pfns;
+    } else {
+        int i;
+        xen_pfns = g_new0(xen_pfn_t, pages);
+        for (i = 0; i < pages; i++) {
+            xen_pfns[i] = pfns[i];
+            if (xen_pfns[i] != pfns[i]) {
+                goto out;
+            }
+        }
+    }
+    ptr = xenforeignmemory_map2(xen_fmem, dom, addr, prot, 0, pages, xen_pfns, errs);
+ out:
+    if ((void *)xen_pfns != (void *)pfns) {
+        g_free(xen_pfns);
+    }
+    return ptr;
+}
+
+static int libxenforeignmem_backend_unmap(void *addr, size_t pages)
+{
+    return xenforeignmemory_unmap(xen_fmem, addr, pages);
+}
+
+#endif
+
+struct foreignmem_backend_ops libxenforeignmem_backend_ops = {
+    .map = libxenforeignmem_backend_map,
+    .unmap = libxenforeignmem_backend_unmap,
+};
+
 void setup_xen_backend_ops(void)
 {
 #if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800
@@ -232,4 +294,5 @@ void setup_xen_backend_ops(void)
 #endif
     xen_evtchn_ops = &libxenevtchn_backend_ops;
     xen_gnttab_ops = &libxengnttab_backend_ops;
+    xen_foreignmem_ops = &libxenforeignmem_backend_ops;
 }
diff --git a/include/hw/xen/xen_backend_ops.h b/include/hw/xen/xen_backend_ops.h
index 3e9392d3fa..5bbe7bbc01 100644
--- a/include/hw/xen/xen_backend_ops.h
+++ b/include/hw/xen/xen_backend_ops.h
@@ -207,6 +207,32 @@ static inline int qemu_xen_gnttab_unmap(xengnttab_handle *xgt,
     return xen_gnttab_ops->unmap(xgt, start_address, refs, count);
 }
 
+struct foreignmem_backend_ops {
+    void *(*map)(uint32_t dom, void *addr, int prot, size_t pages,
+              uint64_t *pfns, int *errs);
+    int (*unmap)(void *addr, size_t pages);
+};
+
+extern struct foreignmem_backend_ops *xen_foreignmem_ops;
+
+static inline void *qemu_xen_foreignmem_map(uint32_t dom, void *addr, int prot,
+                                            size_t pages,
+                                            uint64_t *pfns, int *errs)
+{
+    if (!xen_foreignmem_ops) {
+        return NULL;
+    }
+    return xen_foreignmem_ops->map(dom, addr, prot, pages, pfns, errs);
+}
+
+static inline int qemu_xen_foreignmem_unmap(void *addr, size_t pages)
+{
+    if (!xen_foreignmem_ops) {
+        return -ENOSYS;
+    }
+    return xen_foreignmem_ops->unmap(addr, pages);
+}
+
 void setup_xen_backend_ops(void);
 
 #endif /* QEMU_XEN_BACKEND_OPS_H */
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
index d4d10d3ff1..632ce617cc 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -32,19 +32,6 @@ typedef xc_interface xenforeignmemory_handle;
 #define xenforeignmemory_open(l, f) xen_xc
 #define xenforeignmemory_close(h)
 
-static inline void *xenforeignmemory_map(xc_interface *h, uint32_t dom,
-                                         int prot, size_t pages,
-                                         const xen_pfn_t arr[/*pages*/],
-                                         int err[/*pages*/])
-{
-    if (err)
-        return xc_map_foreign_bulk(h, dom, prot, arr, err, pages);
-    else
-        return xc_map_foreign_pages(h, dom, prot, arr, pages);
-}
-
-#define xenforeignmemory_unmap(h, p, s) munmap(p, s * XC_PAGE_SIZE)
-
 #else /* CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40701 */
 
 #include <xenforeignmemory.h>
diff --git a/softmmu/globals.c b/softmmu/globals.c
index 23bb27f0f6..dda32986f7 100644
--- a/softmmu/globals.c
+++ b/softmmu/globals.c
@@ -67,3 +67,4 @@ enum xen_mode xen_mode = XEN_DISABLED;
 bool xen_domid_restrict;
 struct evtchn_backend_ops *xen_evtchn_ops;
 struct gnttab_backend_ops *xen_gnttab_ops;
+struct foreignmem_backend_ops *xen_foreignmem_ops;
-- 
2.39.0
[RFC PATCH v7bis 06/19] hw/xen: Add xenstore operations to allow redirection to internal emulation
Posted by David Woodhouse 1 year, 3 months ago
From: Paul Durrant <pdurrant@amazon.com>

Signed-off-by: Paul Durrant <pdurrant@amazon.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 accel/xen/xen-all.c                 |  11 +-
 hw/char/xen_console.c               |   2 +-
 hw/xen/xen-bus-helper.c             |  61 +++----
 hw/xen/xen-bus.c                    | 258 ++++------------------------
 hw/xen/xen-legacy-backend.c         | 121 +++++++------
 hw/xen/xen-operations.c             | 189 ++++++++++++++++++++
 hw/xen/xen_devconfig.c              |   4 +-
 hw/xen/xen_pt_graphics.c            |   1 -
 hw/xen/xen_pvdev.c                  |  49 +-----
 include/hw/xen/xen-bus-helper.h     |  25 ++-
 include/hw/xen/xen-bus.h            |  17 +-
 include/hw/xen/xen-legacy-backend.h |   6 +-
 include/hw/xen/xen_backend_ops.h    | 155 +++++++++++++++++
 include/hw/xen/xen_common.h         |   1 -
 include/hw/xen/xen_pvdev.h          |   2 +-
 softmmu/globals.c                   |   1 +
 16 files changed, 502 insertions(+), 401 deletions(-)

diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
index 2329556595..6df7b4ff34 100644
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -100,12 +100,15 @@ void xenstore_store_pv_console_info(int i, Chardev *chr)
 }
 
 
-static void xenstore_record_dm_state(struct xs_handle *xs, const char *state)
+static void xenstore_record_dm_state(const char *state)
 {
+    struct xs_handle *xs;
     char path[50];
 
+    /* We now have everything we need to set the xenstore entry. */
+    xs = xs_open(0);
     if (xs == NULL) {
-        error_report("xenstore connection not initialized");
+        fprintf(stderr, "Could not contact XenStore\n");
         exit(1);
     }
 
@@ -119,6 +122,8 @@ static void xenstore_record_dm_state(struct xs_handle *xs, const char *state)
         error_report("error recording dm state");
         exit(1);
     }
+
+    xs_close(xs);
 }
 
 
@@ -127,7 +132,7 @@ static void xen_change_state_handler(void *opaque, bool running,
 {
     if (running) {
         /* record state running */
-        xenstore_record_dm_state(xenstore, "running");
+        xenstore_record_dm_state("running");
     }
 }
 
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index e9cef3e1ef..ad8638a86d 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -181,7 +181,7 @@ static int con_init(struct XenLegacyDevice *xendev)
     const char *output;
 
     /* setup */
-    dom = xs_get_domain_path(xenstore, con->xendev.dom);
+    dom = qemu_xen_xs_get_domain_path(xenstore, con->xendev.dom);
     if (!xendev->dev) {
         snprintf(con->console, sizeof(con->console), "%s/console", dom);
     } else {
diff --git a/hw/xen/xen-bus-helper.c b/hw/xen/xen-bus-helper.c
index 5a1e12b374..db1e64e0fb 100644
--- a/hw/xen/xen-bus-helper.c
+++ b/hw/xen/xen-bus-helper.c
@@ -10,6 +10,7 @@
 #include "hw/xen/xen-bus.h"
 #include "hw/xen/xen-bus-helper.h"
 #include "qapi/error.h"
+#include "trace.h"
 
 #include <glib/gprintf.h>
 
@@ -46,34 +47,28 @@ const char *xs_strstate(enum xenbus_state state)
     return "INVALID";
 }
 
-void xs_node_create(struct xs_handle *xsh, xs_transaction_t tid,
-                    const char *node, struct xs_permissions perms[],
-                    unsigned int nr_perms, Error **errp)
+void xs_node_create(struct qemu_xs_handle *h, xs_transaction_t tid,
+                    const char *node, unsigned int owner, unsigned int domid,
+                    unsigned int perms, Error **errp)
 {
     trace_xs_node_create(node);
 
-    if (!xs_write(xsh, tid, node, "", 0)) {
+    if (!qemu_xen_xs_create(h, tid, owner, domid, perms, node)) {
         error_setg_errno(errp, errno, "failed to create node '%s'", node);
-        return;
-    }
-
-    if (!xs_set_permissions(xsh, tid, node, perms, nr_perms)) {
-        error_setg_errno(errp, errno, "failed to set node '%s' permissions",
-                         node);
     }
 }
 
-void xs_node_destroy(struct xs_handle *xsh, xs_transaction_t tid,
+void xs_node_destroy(struct qemu_xs_handle *h, xs_transaction_t tid,
                      const char *node, Error **errp)
 {
     trace_xs_node_destroy(node);
 
-    if (!xs_rm(xsh, tid, node)) {
+    if (!qemu_xen_xs_destroy(h, tid, node)) {
         error_setg_errno(errp, errno, "failed to destroy node '%s'", node);
     }
 }
 
-void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid,
+void xs_node_vprintf(struct qemu_xs_handle *h, xs_transaction_t tid,
                      const char *node, const char *key, Error **errp,
                      const char *fmt, va_list ap)
 {
@@ -86,7 +81,7 @@ void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid,
 
     trace_xs_node_vprintf(path, value);
 
-    if (!xs_write(xsh, tid, path, value, len)) {
+    if (!qemu_xen_xs_write(h, tid, path, value, len)) {
         error_setg_errno(errp, errno, "failed to write '%s' to '%s'",
                          value, path);
     }
@@ -95,18 +90,18 @@ void xs_node_vprintf(struct xs_handle *xsh, xs_transaction_t tid,
     g_free(path);
 }
 
-void xs_node_printf(struct xs_handle *xsh,  xs_transaction_t tid,
+void xs_node_printf(struct qemu_xs_handle *h,  xs_transaction_t tid,
                     const char *node, const char *key, Error **errp,
                     const char *fmt, ...)
 {
     va_list ap;
 
     va_start(ap, fmt);
-    xs_node_vprintf(xsh, tid, node, key, errp, fmt, ap);
+    xs_node_vprintf(h, tid, node, key, errp, fmt, ap);
     va_end(ap);
 }
 
-int xs_node_vscanf(struct xs_handle *xsh,  xs_transaction_t tid,
+int xs_node_vscanf(struct qemu_xs_handle *h,  xs_transaction_t tid,
                    const char *node, const char *key, Error **errp,
                    const char *fmt, va_list ap)
 {
@@ -115,7 +110,7 @@ int xs_node_vscanf(struct xs_handle *xsh,  xs_transaction_t tid,
 
     path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) :
         g_strdup(key);
-    value = xs_read(xsh, tid, path, NULL);
+    value = qemu_xen_xs_read(h, tid, path, NULL);
 
     trace_xs_node_vscanf(path, value);
 
@@ -133,7 +128,7 @@ int xs_node_vscanf(struct xs_handle *xsh,  xs_transaction_t tid,
     return rc;
 }
 
-int xs_node_scanf(struct xs_handle *xsh,  xs_transaction_t tid,
+int xs_node_scanf(struct qemu_xs_handle *h,  xs_transaction_t tid,
                   const char *node, const char *key, Error **errp,
                   const char *fmt, ...)
 {
@@ -141,42 +136,34 @@ int xs_node_scanf(struct xs_handle *xsh,  xs_transaction_t tid,
     int rc;
 
     va_start(ap, fmt);
-    rc = xs_node_vscanf(xsh, tid, node, key, errp, fmt, ap);
+    rc = xs_node_vscanf(h, tid, node, key, errp, fmt, ap);
     va_end(ap);
 
     return rc;
 }
 
-void xs_node_watch(struct xs_handle *xsh, const char *node, const char *key,
-                   char *token, Error **errp)
+struct qemu_xs_watch *xs_node_watch(struct qemu_xs_handle *h, const char *node, const char *key,
+                                    xs_watch_fn fn, void *opaque, Error **errp)
 {
     char *path;
+    struct qemu_xs_watch *w;
 
     path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) :
         g_strdup(key);
 
     trace_xs_node_watch(path);
 
-    if (!xs_watch(xsh, path, token)) {
+    w = qemu_xen_xs_watch(h, path, fn, opaque);
+    if (!w) {
         error_setg_errno(errp, errno, "failed to watch node '%s'", path);
     }
 
     g_free(path);
+
+    return w;
 }
 
-void xs_node_unwatch(struct xs_handle *xsh, const char *node,
-                     const char *key, const char *token, Error **errp)
+void xs_node_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
 {
-    char *path;
-
-    path = (strlen(node) != 0) ? g_strdup_printf("%s/%s", node, key) :
-        g_strdup(key);
-
-    trace_xs_node_unwatch(path);
-
-    if (!xs_unwatch(xsh, path, token)) {
-        error_setg_errno(errp, errno, "failed to unwatch node '%s'", path);
-    }
-
-    g_free(path);
+    qemu_xen_xs_unwatch(h, w);
 }
diff --git a/hw/xen/xen-bus.c b/hw/xen/xen-bus.c
index aee6a8c9b0..e8d9adeb3e 100644
--- a/hw/xen/xen-bus.c
+++ b/hw/xen/xen-bus.c
@@ -62,7 +62,7 @@ static void xen_device_unplug(XenDevice *xendev, Error **errp)
 
     /* Mimic the way the Xen toolstack does an unplug */
 again:
-    tid = xs_transaction_start(xenbus->xsh);
+    tid = qemu_xen_xs_transaction_start(xenbus->xsh);
     if (tid == XBT_NULL) {
         error_setg_errno(errp, errno, "failed xs_transaction_start");
         return;
@@ -80,7 +80,7 @@ again:
         goto abort;
     }
 
-    if (!xs_transaction_end(xenbus->xsh, tid, false)) {
+    if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, false)) {
         if (errno == EAGAIN) {
             goto again;
         }
@@ -95,7 +95,7 @@ abort:
      * We only abort if there is already a failure so ignore any error
      * from ending the transaction.
      */
-    xs_transaction_end(xenbus->xsh, tid, true);
+    qemu_xen_xs_transaction_end(xenbus->xsh, tid, true);
 }
 
 static void xen_bus_print_dev(Monitor *mon, DeviceState *dev, int indent)
@@ -111,143 +111,6 @@ static char *xen_bus_get_dev_path(DeviceState *dev)
     return xen_device_get_backend_path(XEN_DEVICE(dev));
 }
 
-struct XenWatch {
-    char *node, *key;
-    char *token;
-    XenWatchHandler handler;
-    void *opaque;
-    Notifier notifier;
-};
-
-static void watch_notify(Notifier *n, void *data)
-{
-    XenWatch *watch = container_of(n, XenWatch, notifier);
-    const char *token = data;
-
-    if (!strcmp(watch->token, token)) {
-        watch->handler(watch->opaque);
-    }
-}
-
-static XenWatch *new_watch(const char *node, const char *key,
-                           XenWatchHandler handler, void *opaque)
-{
-    XenWatch *watch = g_new0(XenWatch, 1);
-    QemuUUID uuid;
-
-    qemu_uuid_generate(&uuid);
-
-    watch->token = qemu_uuid_unparse_strdup(&uuid);
-    watch->node = g_strdup(node);
-    watch->key = g_strdup(key);
-    watch->handler = handler;
-    watch->opaque = opaque;
-    watch->notifier.notify = watch_notify;
-
-    return watch;
-}
-
-static void free_watch(XenWatch *watch)
-{
-    g_free(watch->token);
-    g_free(watch->key);
-    g_free(watch->node);
-
-    g_free(watch);
-}
-
-struct XenWatchList {
-    struct xs_handle *xsh;
-    NotifierList notifiers;
-};
-
-static void watch_list_event(void *opaque)
-{
-    XenWatchList *watch_list = opaque;
-    char **v;
-    const char *token;
-
-    v = xs_check_watch(watch_list->xsh);
-    if (!v) {
-        return;
-    }
-
-    token = v[XS_WATCH_TOKEN];
-
-    notifier_list_notify(&watch_list->notifiers, (void *)token);
-
-    free(v);
-}
-
-static XenWatchList *watch_list_create(struct xs_handle *xsh)
-{
-    XenWatchList *watch_list = g_new0(XenWatchList, 1);
-
-    g_assert(xsh);
-
-    watch_list->xsh = xsh;
-    notifier_list_init(&watch_list->notifiers);
-    qemu_set_fd_handler(xs_fileno(watch_list->xsh), watch_list_event, NULL,
-                        watch_list);
-
-    return watch_list;
-}
-
-static void watch_list_destroy(XenWatchList *watch_list)
-{
-    g_assert(notifier_list_empty(&watch_list->notifiers));
-    qemu_set_fd_handler(xs_fileno(watch_list->xsh), NULL, NULL, NULL);
-    g_free(watch_list);
-}
-
-static XenWatch *watch_list_add(XenWatchList *watch_list, const char *node,
-                                const char *key, XenWatchHandler handler,
-                                void *opaque, Error **errp)
-{
-    ERRP_GUARD();
-    XenWatch *watch = new_watch(node, key, handler, opaque);
-
-    notifier_list_add(&watch_list->notifiers, &watch->notifier);
-
-    xs_node_watch(watch_list->xsh, node, key, watch->token, errp);
-    if (*errp) {
-        notifier_remove(&watch->notifier);
-        free_watch(watch);
-
-        return NULL;
-    }
-
-    return watch;
-}
-
-static void watch_list_remove(XenWatchList *watch_list, XenWatch *watch,
-                              Error **errp)
-{
-    xs_node_unwatch(watch_list->xsh, watch->node, watch->key, watch->token,
-                    errp);
-
-    notifier_remove(&watch->notifier);
-    free_watch(watch);
-}
-
-static XenWatch *xen_bus_add_watch(XenBus *xenbus, const char *node,
-                                   const char *key, XenWatchHandler handler,
-                                   Error **errp)
-{
-    trace_xen_bus_add_watch(node, key);
-
-    return watch_list_add(xenbus->watch_list, node, key, handler, xenbus,
-                          errp);
-}
-
-static void xen_bus_remove_watch(XenBus *xenbus, XenWatch *watch,
-                                 Error **errp)
-{
-    trace_xen_bus_remove_watch(watch->node, watch->key);
-
-    watch_list_remove(xenbus->watch_list, watch, errp);
-}
-
 static void xen_bus_backend_create(XenBus *xenbus, const char *type,
                                    const char *name, char *path,
                                    Error **errp)
@@ -261,15 +124,15 @@ static void xen_bus_backend_create(XenBus *xenbus, const char *type,
     trace_xen_bus_backend_create(type, path);
 
 again:
-    tid = xs_transaction_start(xenbus->xsh);
+    tid = qemu_xen_xs_transaction_start(xenbus->xsh);
     if (tid == XBT_NULL) {
         error_setg(errp, "failed xs_transaction_start");
         return;
     }
 
-    key = xs_directory(xenbus->xsh, tid, path, &n);
+    key = qemu_xen_xs_directory(xenbus->xsh, tid, path, &n);
     if (!key) {
-        if (!xs_transaction_end(xenbus->xsh, tid, true)) {
+        if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, true)) {
             error_setg_errno(errp, errno, "failed xs_transaction_end");
         }
         return;
@@ -300,7 +163,7 @@ again:
 
     free(key);
 
-    if (!xs_transaction_end(xenbus->xsh, tid, false)) {
+    if (!qemu_xen_xs_transaction_end(xenbus->xsh, tid, false)) {
         qobject_unref(opts);
 
         if (errno == EAGAIN) {
@@ -327,7 +190,7 @@ static void xen_bus_type_enumerate(XenBus *xenbus, const char *type)
 
     trace_xen_bus_type_enumerate(type);
 
-    backend = xs_directory(xenbus->xsh, XBT_NULL, domain_path, &n);
+    backend = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, domain_path, &n);
     if (!backend) {
         goto out;
     }
@@ -372,7 +235,7 @@ static void xen_bus_enumerate(XenBus *xenbus)
 
     trace_xen_bus_enumerate();
 
-    type = xs_directory(xenbus->xsh, XBT_NULL, "backend", &n);
+    type = qemu_xen_xs_directory(xenbus->xsh, XBT_NULL, "backend", &n);
     if (!type) {
         return;
     }
@@ -415,7 +278,7 @@ static void xen_bus_cleanup(XenBus *xenbus)
     }
 }
 
-static void xen_bus_backend_changed(void *opaque)
+static void xen_bus_backend_changed(void *opaque, const char *path)
 {
     XenBus *xenbus = opaque;
 
@@ -434,7 +297,7 @@ static void xen_bus_unrealize(BusState *bus)
 
         for (i = 0; i < xenbus->backend_types; i++) {
             if (xenbus->backend_watch[i]) {
-                xen_bus_remove_watch(xenbus, xenbus->backend_watch[i], NULL);
+                xs_node_unwatch(xenbus->xsh, xenbus->backend_watch[i]);
             }
         }
 
@@ -442,13 +305,8 @@ static void xen_bus_unrealize(BusState *bus)
         xenbus->backend_watch = NULL;
     }
 
-    if (xenbus->watch_list) {
-        watch_list_destroy(xenbus->watch_list);
-        xenbus->watch_list = NULL;
-    }
-
     if (xenbus->xsh) {
-        xs_close(xenbus->xsh);
+        qemu_xen_xs_close(xenbus->xsh);
     }
 }
 
@@ -463,7 +321,7 @@ static void xen_bus_realize(BusState *bus, Error **errp)
 
     trace_xen_bus_realize();
 
-    xenbus->xsh = xs_open(0);
+    xenbus->xsh = qemu_xen_xs_open();
     if (!xenbus->xsh) {
         error_setg_errno(errp, errno, "failed xs_open");
         goto fail;
@@ -476,19 +334,16 @@ static void xen_bus_realize(BusState *bus, Error **errp)
         xenbus->backend_id = 0; /* Assume lack of node means dom0 */
     }
 
-    xenbus->watch_list = watch_list_create(xenbus->xsh);
-
     module_call_init(MODULE_INIT_XEN_BACKEND);
 
     type = xen_backend_get_types(&xenbus->backend_types);
-    xenbus->backend_watch = g_new(XenWatch *, xenbus->backend_types);
+    xenbus->backend_watch = g_new(struct qemu_xs_watch *, xenbus->backend_types);
 
     for (i = 0; i < xenbus->backend_types; i++) {
         char *node = g_strdup_printf("backend/%s", type[i]);
 
         xenbus->backend_watch[i] =
-            xen_bus_add_watch(xenbus, node, key, xen_bus_backend_changed,
-                              &local_err);
+            xs_node_watch(xenbus->xsh, node, key, xen_bus_backend_changed, xenbus, &local_err);
         if (local_err) {
             /* This need not be treated as a hard error so don't propagate */
             error_reportf_err(local_err,
@@ -631,7 +486,7 @@ static bool xen_device_frontend_is_active(XenDevice *xendev)
     }
 }
 
-static void xen_device_backend_changed(void *opaque)
+static void xen_device_backend_changed(void *opaque, const char *path)
 {
     XenDevice *xendev = opaque;
     const char *type = object_get_typename(OBJECT(xendev));
@@ -685,66 +540,35 @@ static void xen_device_backend_changed(void *opaque)
     }
 }
 
-static XenWatch *xen_device_add_watch(XenDevice *xendev, const char *node,
-                                      const char *key,
-                                      XenWatchHandler handler,
-                                      Error **errp)
-{
-    const char *type = object_get_typename(OBJECT(xendev));
-
-    trace_xen_device_add_watch(type, xendev->name, node, key);
-
-    return watch_list_add(xendev->watch_list, node, key, handler, xendev,
-                          errp);
-}
-
-static void xen_device_remove_watch(XenDevice *xendev, XenWatch *watch,
-                                    Error **errp)
-{
-    const char *type = object_get_typename(OBJECT(xendev));
-
-    trace_xen_device_remove_watch(type, xendev->name, watch->node,
-                                  watch->key);
-
-    watch_list_remove(xendev->watch_list, watch, errp);
-}
-
-
 static void xen_device_backend_create(XenDevice *xendev, Error **errp)
 {
     ERRP_GUARD();
     XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
-    struct xs_permissions perms[2];
 
     xendev->backend_path = xen_device_get_backend_path(xendev);
 
-    perms[0].id = xenbus->backend_id;
-    perms[0].perms = XS_PERM_NONE;
-    perms[1].id = xendev->frontend_id;
-    perms[1].perms = XS_PERM_READ;
-
     g_assert(xenbus->xsh);
 
-    xs_node_create(xenbus->xsh, XBT_NULL, xendev->backend_path, perms,
-                   ARRAY_SIZE(perms), errp);
+    xs_node_create(xenbus->xsh, XBT_NULL, xendev->backend_path, xenbus->backend_id,
+                   xendev->frontend_id, XS_PERM_READ, errp);
     if (*errp) {
         error_prepend(errp, "failed to create backend: ");
         return;
     }
 
     xendev->backend_state_watch =
-        xen_device_add_watch(xendev, xendev->backend_path,
-                             "state", xen_device_backend_changed,
-                             errp);
+        xs_node_watch(xendev->xsh, xendev->backend_path,
+                      "state", xen_device_backend_changed, xendev,
+                      errp);
     if (*errp) {
         error_prepend(errp, "failed to watch backend state: ");
         return;
     }
 
     xendev->backend_online_watch =
-        xen_device_add_watch(xendev, xendev->backend_path,
-                             "online", xen_device_backend_changed,
-                             errp);
+        xs_node_watch(xendev->xsh, xendev->backend_path,
+                      "online", xen_device_backend_changed, xendev,
+                      errp);
     if (*errp) {
         error_prepend(errp, "failed to watch backend online: ");
         return;
@@ -757,12 +581,12 @@ static void xen_device_backend_destroy(XenDevice *xendev)
     Error *local_err = NULL;
 
     if (xendev->backend_online_watch) {
-        xen_device_remove_watch(xendev, xendev->backend_online_watch, NULL);
+        xs_node_unwatch(xendev->xsh, xendev->backend_online_watch);
         xendev->backend_online_watch = NULL;
     }
 
     if (xendev->backend_state_watch) {
-        xen_device_remove_watch(xendev, xendev->backend_state_watch, NULL);
+        xs_node_unwatch(xendev->xsh, xendev->backend_state_watch);
         xendev->backend_state_watch = NULL;
     }
 
@@ -837,7 +661,7 @@ static void xen_device_frontend_set_state(XenDevice *xendev,
     }
 }
 
-static void xen_device_frontend_changed(void *opaque)
+static void xen_device_frontend_changed(void *opaque, const char *path)
 {
     XenDevice *xendev = opaque;
     XenDeviceClass *xendev_class = XEN_DEVICE_GET_CLASS(xendev);
@@ -885,7 +709,6 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp)
 {
     ERRP_GUARD();
     XenBus *xenbus = XEN_BUS(qdev_get_parent_bus(DEVICE(xendev)));
-    struct xs_permissions perms[2];
 
     xendev->frontend_path = xen_device_get_frontend_path(xendev);
 
@@ -894,15 +717,10 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp)
      * toolstack.
      */
     if (!xen_device_frontend_exists(xendev)) {
-        perms[0].id = xendev->frontend_id;
-        perms[0].perms = XS_PERM_NONE;
-        perms[1].id = xenbus->backend_id;
-        perms[1].perms = XS_PERM_READ | XS_PERM_WRITE;
-
         g_assert(xenbus->xsh);
 
-        xs_node_create(xenbus->xsh, XBT_NULL, xendev->frontend_path, perms,
-                       ARRAY_SIZE(perms), errp);
+        xs_node_create(xenbus->xsh, XBT_NULL, xendev->frontend_path, xendev->frontend_id,
+                       xenbus->backend_id, XS_PERM_READ | XS_PERM_WRITE, errp);
         if (*errp) {
             error_prepend(errp, "failed to create frontend: ");
             return;
@@ -910,8 +728,8 @@ static void xen_device_frontend_create(XenDevice *xendev, Error **errp)
     }
 
     xendev->frontend_state_watch =
-        xen_device_add_watch(xendev, xendev->frontend_path, "state",
-                             xen_device_frontend_changed, errp);
+        xs_node_watch(xendev->xsh, xendev->frontend_path, "state",
+                      xen_device_frontend_changed, xendev, errp);
     if (*errp) {
         error_prepend(errp, "failed to watch frontend state: ");
     }
@@ -923,8 +741,7 @@ static void xen_device_frontend_destroy(XenDevice *xendev)
     Error *local_err = NULL;
 
     if (xendev->frontend_state_watch) {
-        xen_device_remove_watch(xendev, xendev->frontend_state_watch,
-                                NULL);
+        xs_node_unwatch(xendev->xsh, xendev->frontend_state_watch);
         xendev->frontend_state_watch = NULL;
     }
 
@@ -1146,13 +963,8 @@ static void xen_device_unrealize(DeviceState *dev)
         xendev->xgth = NULL;
     }
 
-    if (xendev->watch_list) {
-        watch_list_destroy(xendev->watch_list);
-        xendev->watch_list = NULL;
-    }
-
     if (xendev->xsh) {
-        xs_close(xendev->xsh);
+        qemu_xen_xs_close(xendev->xsh);
         xendev->xsh = NULL;
     }
 
@@ -1197,14 +1009,12 @@ static void xen_device_realize(DeviceState *dev, Error **errp)
 
     trace_xen_device_realize(type, xendev->name);
 
-    xendev->xsh = xs_open(0);
+    xendev->xsh = qemu_xen_xs_open();
     if (!xendev->xsh) {
         error_setg_errno(errp, errno, "failed xs_open");
         goto unrealize;
     }
 
-    xendev->watch_list = watch_list_create(xendev->xsh);
-
     xendev->xgth = qemu_xen_gnttab_open();
     if (!xendev->xgth) {
         error_setg_errno(errp, errno, "failed xengnttab_open");
diff --git a/hw/xen/xen-legacy-backend.c b/hw/xen/xen-legacy-backend.c
index 047463d89c..96d0941a7c 100644
--- a/hw/xen/xen-legacy-backend.c
+++ b/hw/xen/xen-legacy-backend.c
@@ -39,7 +39,7 @@ BusState *xen_sysbus;
 /* ------------------------------------------------------------- */
 
 /* public */
-struct xs_handle *xenstore;
+struct qemu_xs_handle *xenstore;
 const char *xen_protocol;
 
 /* private */
@@ -274,6 +274,25 @@ static void xen_be_frontend_changed(struct XenLegacyDevice *xendev,
     }
 }
 
+static void xenstore_update_fe(void *opaque, const char *watch)
+{
+    struct XenLegacyDevice *xendev = opaque;
+    const char *node;
+    unsigned int len;
+
+    len = strlen(xendev->fe);
+    if (strncmp(xendev->fe, watch, len) != 0) {
+        return;
+    }
+    if (watch[len] != '/') {
+        return;
+    }
+    node = watch + len + 1;
+
+    xen_be_frontend_changed(xendev, node);
+    xen_be_check_state(xendev);
+}
+
 /* ------------------------------------------------------------- */
 /* Check for possible state transitions and perform them.        */
 
@@ -287,7 +306,6 @@ static void xen_be_frontend_changed(struct XenLegacyDevice *xendev,
  */
 static int xen_be_try_setup(struct XenLegacyDevice *xendev)
 {
-    char token[XEN_BUFSIZE];
     int be_state;
 
     if (xenstore_read_be_int(xendev, "state", &be_state) == -1) {
@@ -308,8 +326,8 @@ static int xen_be_try_setup(struct XenLegacyDevice *xendev)
     }
 
     /* setup frontend watch */
-    snprintf(token, sizeof(token), "fe:%p", xendev);
-    if (!xs_watch(xenstore, xendev->fe, token)) {
+    xendev->watch = qemu_xen_xs_watch(xenstore, xendev->fe, xenstore_update_fe, xendev);
+    if (!xendev->watch) {
         xen_pv_printf(xendev, 0, "watching frontend path (%s) failed\n",
                       xendev->fe);
         return -1;
@@ -498,46 +516,20 @@ void xen_be_check_state(struct XenLegacyDevice *xendev)
 
 /* ------------------------------------------------------------- */
 
-static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops)
-{
-    struct XenLegacyDevice *xendev;
-    char path[XEN_BUFSIZE], token[XEN_BUFSIZE];
-    char **dev = NULL;
-    unsigned int cdev, j;
-
-    /* setup watch */
-    snprintf(token, sizeof(token), "be:%p:%d:%p", type, dom, ops);
-    snprintf(path, sizeof(path), "backend/%s/%d", type, dom);
-    if (!xs_watch(xenstore, path, token)) {
-        xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n",
-                      path);
-        return -1;
-    }
-
-    /* look for backends */
-    dev = xs_directory(xenstore, 0, path, &cdev);
-    if (!dev) {
-        return 0;
-    }
-    for (j = 0; j < cdev; j++) {
-        xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops);
-        if (xendev == NULL) {
-            continue;
-        }
-        xen_be_check_state(xendev);
-    }
-    free(dev);
-    return 0;
-}
+struct xenstore_be {
+    const char *type;
+    int dom;
+    struct XenDevOps *ops;
+};
 
-void xenstore_update_be(char *watch, char *type, int dom,
-                        struct XenDevOps *ops)
+static void xenstore_update_be(void *opaque, const char *watch)
 {
+    struct xenstore_be *be = opaque;
     struct XenLegacyDevice *xendev;
     char path[XEN_BUFSIZE], *bepath;
     unsigned int len, dev;
 
-    len = snprintf(path, sizeof(path), "backend/%s/%d", type, dom);
+    len = snprintf(path, sizeof(path), "backend/%s/%d", be->type, be->dom);
     if (strncmp(path, watch, len) != 0) {
         return;
     }
@@ -551,9 +543,9 @@ void xenstore_update_be(char *watch, char *type, int dom,
         return;
     }
 
-    xendev = xen_be_get_xendev(type, dom, dev, ops);
+    xendev = xen_be_get_xendev(be->type, be->dom, dev, be->ops);
     if (xendev != NULL) {
-        bepath = xs_read(xenstore, 0, xendev->be, &len);
+        bepath = qemu_xen_xs_read(xenstore, 0, xendev->be, &len);
         if (bepath == NULL) {
             xen_pv_del_xendev(xendev);
         } else {
@@ -564,35 +556,51 @@ void xenstore_update_be(char *watch, char *type, int dom,
     }
 }
 
-void xenstore_update_fe(char *watch, struct XenLegacyDevice *xendev)
+static int xenstore_scan(const char *type, int dom, struct XenDevOps *ops)
 {
-    char *node;
-    unsigned int len;
+    struct XenLegacyDevice *xendev;
+    char path[XEN_BUFSIZE];
+    struct xenstore_be *be = g_new0(struct xenstore_be, 1);
+    char **dev = NULL;
+    unsigned int cdev, j;
 
-    len = strlen(xendev->fe);
-    if (strncmp(xendev->fe, watch, len) != 0) {
-        return;
-    }
-    if (watch[len] != '/') {
-        return;
+    /* setup watch */
+    be->type = type;
+    be->dom = dom;
+    be->ops = ops;
+    snprintf(path, sizeof(path), "backend/%s/%d", type, dom);
+    if (!qemu_xen_xs_watch(xenstore, path, xenstore_update_be, be)) {
+        xen_pv_printf(NULL, 0, "xen be: watching backend path (%s) failed\n",
+                      path);
+        return -1;
     }
-    node = watch + len + 1;
 
-    xen_be_frontend_changed(xendev, node);
-    xen_be_check_state(xendev);
+    /* look for backends */
+    dev = qemu_xen_xs_directory(xenstore, 0, path, &cdev);
+    if (!dev) {
+        return 0;
+    }
+    for (j = 0; j < cdev; j++) {
+        xendev = xen_be_get_xendev(type, dom, atoi(dev[j]), ops);
+        if (xendev == NULL) {
+            continue;
+        }
+        xen_be_check_state(xendev);
+    }
+    free(dev);
+    return 0;
 }
+
 /* -------------------------------------------------------------------- */
 
 int xen_be_init(void)
 {
-    xenstore = xs_daemon_open();
+    xenstore = qemu_xen_xs_open();
     if (!xenstore) {
         xen_pv_printf(NULL, 0, "can't connect to xenstored\n");
         return -1;
     }
 
-    qemu_set_fd_handler(xs_fileno(xenstore), xenstore_update, NULL, NULL);
-
     if (xen_evtchn_ops == NULL || xen_gnttab_ops == NULL) {
         /* Check if xen_init() have been called */
         goto err;
@@ -606,8 +614,7 @@ int xen_be_init(void)
     return 0;
 
 err:
-    qemu_set_fd_handler(xs_fileno(xenstore), NULL, NULL, NULL);
-    xs_daemon_close(xenstore);
+    qemu_xen_xs_close(xenstore);
     xenstore = NULL;
 
     return -1;
diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c
index 4c6b305cc4..b2b76c63af 100644
--- a/hw/xen/xen-operations.c
+++ b/hw/xen/xen-operations.c
@@ -10,6 +10,7 @@
  */
 
 #include "qemu/osdep.h"
+#include "qemu/uuid.h"
 #include "qapi/error.h"
 
 #include "hw/xen/xen_backend_ops.h"
@@ -280,6 +281,193 @@ struct foreignmem_backend_ops libxenforeignmem_backend_ops = {
     .unmap = libxenforeignmem_backend_unmap,
 };
 
+struct qemu_xs_handle {
+    struct xs_handle *xsh;
+    NotifierList notifiers;
+};
+
+static void watch_event(void *opaque)
+{
+    struct qemu_xs_handle *h = opaque;
+
+    for (;;) {
+        char **v = xs_check_watch(h->xsh);
+
+        if (!v) {
+            break;
+        }
+
+        notifier_list_notify(&h->notifiers, v);
+        free(v);
+    }
+}
+
+static struct qemu_xs_handle *libxenstore_open(void)
+{
+    struct xs_handle *xsh = xs_open(0);
+    struct qemu_xs_handle *h = g_new0(struct qemu_xs_handle, 1);
+
+    if (!xsh)
+        return NULL;
+
+    h = g_new0(struct qemu_xs_handle, 1);
+    h->xsh = xsh;
+
+    notifier_list_init(&h->notifiers);
+    qemu_set_fd_handler(xs_fileno(h->xsh), watch_event, NULL, h);
+
+    return h;
+}
+
+static void libxenstore_close(struct qemu_xs_handle *h)
+{
+    g_assert(notifier_list_empty(&h->notifiers));
+    qemu_set_fd_handler(xs_fileno(h->xsh), NULL, NULL, NULL);
+    xs_close(h->xsh);
+    g_free(h);
+}
+
+static char *libxenstore_get_domain_path(struct qemu_xs_handle *h, unsigned int domid)
+{
+    return xs_get_domain_path(h->xsh, domid);
+}
+
+static char **libxenstore_directory(struct qemu_xs_handle *h, xs_transaction_t t,
+                             const char *path, unsigned int *num)
+{
+    return xs_directory(h->xsh, t, path, num);
+}
+
+static void *libxenstore_read(struct qemu_xs_handle *h, xs_transaction_t t,
+                              const char *path, unsigned int *len)
+{
+    return xs_read(h->xsh, t, path, len);
+}
+
+static bool libxenstore_write(struct qemu_xs_handle *h, xs_transaction_t t,
+                              const char *path, const void *data, unsigned int len)
+{
+    return xs_write(h->xsh, t, path, data, len);
+}
+
+static bool libxenstore_create(struct qemu_xs_handle *h, xs_transaction_t t,
+                               unsigned int owner, unsigned int domid,
+                               unsigned int perms, const char *path)
+{
+    struct xs_permissions perms_list[] = {
+        {
+            .id    = owner,
+            .perms = XS_PERM_NONE,
+        },
+        {
+            .id    = domid,
+            .perms = perms,
+        },
+    };
+
+    if (!xs_mkdir(h->xsh, t, path))
+        return false;
+
+    return xs_set_permissions(h->xsh, t, path, perms_list, ARRAY_SIZE(perms_list));
+}
+
+static bool libxenstore_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
+                                const char *path)
+{
+    return xs_rm(h->xsh, t, path);
+}
+
+struct qemu_xs_watch {
+    char *path;
+    char *token;
+    xs_watch_fn fn;
+    void *opaque;
+    Notifier notifier;
+};
+
+static void watch_notify(Notifier *n, void *data)
+{
+    struct qemu_xs_watch *w = container_of(n, struct qemu_xs_watch, notifier);
+    const char **v = data;
+
+    if (!strcmp(w->token, v[XS_WATCH_TOKEN])) {
+        w->fn(w->opaque, v[XS_WATCH_PATH]);
+    }
+}
+
+static struct qemu_xs_watch *new_watch(const char *path, xs_watch_fn fn, void *opaque)
+{
+    struct qemu_xs_watch *w = g_new0(struct qemu_xs_watch, 1);
+    QemuUUID uuid;
+
+    qemu_uuid_generate(&uuid);
+
+    w->token = qemu_uuid_unparse_strdup(&uuid);
+    w->path = g_strdup(path);
+    w->fn = fn;
+    w->opaque = opaque;
+    w->notifier.notify = watch_notify;
+
+    return w;
+}
+
+static void free_watch(struct qemu_xs_watch *w)
+{
+    g_free(w->token);
+    g_free(w->path);
+
+    g_free(w);
+}
+
+static struct qemu_xs_watch *libxenstore_watch(struct qemu_xs_handle *h, const char *path,
+                                               xs_watch_fn fn, void *opaque)
+{
+    struct qemu_xs_watch *w = new_watch(path, fn, opaque);
+
+    notifier_list_add(&h->notifiers, &w->notifier);
+
+    if (!xs_watch(h->xsh, path, w->token)) {
+        notifier_remove(&w->notifier);
+        free_watch(w);
+        return NULL;
+    }
+
+    return w;
+}
+
+static void libxenstore_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
+{
+    xs_unwatch(h->xsh, w->path, w->token);
+    notifier_remove(&w->notifier);
+    free_watch(w);
+}
+
+static xs_transaction_t libxenstore_transaction_start(struct qemu_xs_handle *h)
+{
+    return xs_transaction_start(h->xsh);
+}
+
+static bool libxenstore_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t,
+                                        bool abort)
+{
+    return xs_transaction_end(h->xsh, t, abort);
+}
+
+struct xenstore_backend_ops libxenstore_backend_ops = {
+    .open = libxenstore_open,
+    .close = libxenstore_close,
+    .get_domain_path = libxenstore_get_domain_path,
+    .directory = libxenstore_directory,
+    .read = libxenstore_read,
+    .write = libxenstore_write,
+    .create = libxenstore_create,
+    .destroy = libxenstore_destroy,
+    .watch = libxenstore_watch,
+    .unwatch = libxenstore_unwatch,
+    .transaction_start = libxenstore_transaction_start,
+    .transaction_end = libxenstore_transaction_end,
+};
+
 void setup_xen_backend_ops(void)
 {
 #if CONFIG_XEN_CTRL_INTERFACE_VERSION >= 40800
@@ -295,4 +483,5 @@ void setup_xen_backend_ops(void)
     xen_evtchn_ops = &libxenevtchn_backend_ops;
     xen_gnttab_ops = &libxengnttab_backend_ops;
     xen_foreignmem_ops = &libxenforeignmem_backend_ops;
+    xen_xenstore_ops = &libxenstore_backend_ops;
 }
diff --git a/hw/xen/xen_devconfig.c b/hw/xen/xen_devconfig.c
index 46ee4a7f02..9b7304e544 100644
--- a/hw/xen/xen_devconfig.c
+++ b/hw/xen/xen_devconfig.c
@@ -11,11 +11,11 @@ static int xen_config_dev_dirs(const char *ftype, const char *btype, int vdev,
 {
     char *dom;
 
-    dom = xs_get_domain_path(xenstore, xen_domid);
+    dom = qemu_xen_xs_get_domain_path(xenstore, xen_domid);
     snprintf(fe, len, "%s/device/%s/%d", dom, ftype, vdev);
     free(dom);
 
-    dom = xs_get_domain_path(xenstore, 0);
+    dom = qemu_xen_xs_get_domain_path(xenstore, 0);
     snprintf(be, len, "%s/backend/%s/%d/%d", dom, btype, xen_domid, vdev);
     free(dom);
 
diff --git a/hw/xen/xen_pt_graphics.c b/hw/xen/xen_pt_graphics.c
index f303f67c9c..0aed3bb6fd 100644
--- a/hw/xen/xen_pt_graphics.c
+++ b/hw/xen/xen_pt_graphics.c
@@ -5,7 +5,6 @@
 #include "qapi/error.h"
 #include "xen_pt.h"
 #include "xen-host-pci-device.h"
-#include "hw/xen/xen-legacy-backend.h"
 
 static unsigned long igd_guest_opregion;
 static unsigned long igd_host_opregion;
diff --git a/hw/xen/xen_pvdev.c b/hw/xen/xen_pvdev.c
index d8582cc74c..be1504b82c 100644
--- a/hw/xen/xen_pvdev.c
+++ b/hw/xen/xen_pvdev.c
@@ -54,31 +54,17 @@ void xen_config_cleanup(void)
     struct xs_dirs *d;
 
     QTAILQ_FOREACH(d, &xs_cleanup, list) {
-        xs_rm(xenstore, 0, d->xs_dir);
+        qemu_xen_xs_destroy(xenstore, 0, d->xs_dir);
     }
 }
 
 int xenstore_mkdir(char *path, int p)
 {
-    struct xs_permissions perms[2] = {
-        {
-            .id    = 0, /* set owner: dom0 */
-        }, {
-            .id    = xen_domid,
-            .perms = p,
-        }
-    };
-
-    if (!xs_mkdir(xenstore, 0, path)) {
+    if (!qemu_xen_xs_create(xenstore, 0, 0, xen_domid, p, path)) {
         xen_pv_printf(NULL, 0, "xs_mkdir %s: failed\n", path);
         return -1;
     }
     xenstore_cleanup_dir(g_strdup(path));
-
-    if (!xs_set_permissions(xenstore, 0, path, perms, 2)) {
-        xen_pv_printf(NULL, 0, "xs_set_permissions %s: failed\n", path);
-        return -1;
-    }
     return 0;
 }
 
@@ -87,7 +73,7 @@ int xenstore_write_str(const char *base, const char *node, const char *val)
     char abspath[XEN_BUFSIZE];
 
     snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
-    if (!xs_write(xenstore, 0, abspath, val, strlen(val))) {
+    if (!qemu_xen_xs_write(xenstore, 0, abspath, val, strlen(val))) {
         return -1;
     }
     return 0;
@@ -100,7 +86,7 @@ char *xenstore_read_str(const char *base, const char *node)
     char *str, *ret = NULL;
 
     snprintf(abspath, sizeof(abspath), "%s/%s", base, node);
-    str = xs_read(xenstore, 0, abspath, &len);
+    str = qemu_xen_xs_read(xenstore, 0, abspath, &len);
     if (str != NULL) {
         /* move to qemu-allocated memory to make sure
          * callers can savely g_free() stuff. */
@@ -152,29 +138,6 @@ int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval)
     return rc;
 }
 
-void xenstore_update(void *unused)
-{
-    char **vec = NULL;
-    intptr_t type, ops, ptr;
-    unsigned int dom, count;
-
-    vec = xs_read_watch(xenstore, &count);
-    if (vec == NULL) {
-        goto cleanup;
-    }
-
-    if (sscanf(vec[XS_WATCH_TOKEN], "be:%" PRIxPTR ":%d:%" PRIxPTR,
-               &type, &dom, &ops) == 3) {
-        xenstore_update_be(vec[XS_WATCH_PATH], (void *)type, dom, (void*)ops);
-    }
-    if (sscanf(vec[XS_WATCH_TOKEN], "fe:%" PRIxPTR, &ptr) == 1) {
-        xenstore_update_fe(vec[XS_WATCH_PATH], (void *)ptr);
-    }
-
-cleanup:
-    free(vec);
-}
-
 const char *xenbus_strstate(enum xenbus_state state)
 {
     static const char *const name[] = {
@@ -299,9 +262,7 @@ void xen_pv_del_xendev(struct XenLegacyDevice *xendev)
     }
 
     if (xendev->fe) {
-        char token[XEN_BUFSIZE];
-        snprintf(token, sizeof(token), "fe:%p", xendev);
-        xs_unwatch(xenstore, xendev->fe, token);
+        qemu_xen_xs_unwatch(xenstore, xendev->watch);
         g_free(xendev->fe);
     }
 
diff --git a/include/hw/xen/xen-bus-helper.h b/include/hw/xen/xen-bus-helper.h
index 8782f30550..c5b083e4ab 100644
--- a/include/hw/xen/xen-bus-helper.h
+++ b/include/hw/xen/xen-bus-helper.h
@@ -8,40 +8,39 @@
 #ifndef HW_XEN_BUS_HELPER_H
 #define HW_XEN_BUS_HELPER_H
 
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen_backend_ops.h"
 
 const char *xs_strstate(enum xenbus_state state);
 
-void xs_node_create(struct xs_handle *xsh,  xs_transaction_t tid,
-                    const char *node, struct xs_permissions perms[],
-                    unsigned int nr_perms, Error **errp);
-void xs_node_destroy(struct xs_handle *xsh,  xs_transaction_t tid,
+void xs_node_create(struct qemu_xs_handle *h,  xs_transaction_t tid,
+                    const char *node, unsigned int owner, unsigned int domid,
+                    unsigned int perms, Error **errp);
+void xs_node_destroy(struct qemu_xs_handle *h,  xs_transaction_t tid,
                      const char *node, Error **errp);
 
 /* Write to node/key unless node is empty, in which case write to key */
-void xs_node_vprintf(struct xs_handle *xsh,  xs_transaction_t tid,
+void xs_node_vprintf(struct qemu_xs_handle *h,  xs_transaction_t tid,
                      const char *node, const char *key, Error **errp,
                      const char *fmt, va_list ap)
     G_GNUC_PRINTF(6, 0);
-void xs_node_printf(struct xs_handle *xsh,  xs_transaction_t tid,
+void xs_node_printf(struct qemu_xs_handle *h,  xs_transaction_t tid,
                     const char *node, const char *key, Error **errp,
                     const char *fmt, ...)
     G_GNUC_PRINTF(6, 7);
 
 /* Read from node/key unless node is empty, in which case read from key */
-int xs_node_vscanf(struct xs_handle *xsh,  xs_transaction_t tid,
+int xs_node_vscanf(struct qemu_xs_handle *h,  xs_transaction_t tid,
                    const char *node, const char *key, Error **errp,
                    const char *fmt, va_list ap)
     G_GNUC_SCANF(6, 0);
-int xs_node_scanf(struct xs_handle *xsh,  xs_transaction_t tid,
+int xs_node_scanf(struct qemu_xs_handle *h,  xs_transaction_t tid,
                   const char *node, const char *key, Error **errp,
                   const char *fmt, ...)
     G_GNUC_SCANF(6, 7);
 
 /* Watch node/key unless node is empty, in which case watch key */
-void xs_node_watch(struct xs_handle *xsh, const char *node, const char *key,
-                   char *token, Error **errp);
-void xs_node_unwatch(struct xs_handle *xsh, const char *node, const char *key,
-                     const char *token, Error **errp);
+struct qemu_xs_watch *xs_node_watch(struct qemu_xs_handle *h, const char *node, const char *key,
+                                    xs_watch_fn fn, void *opaque, Error **errp);
+void xs_node_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w);
 
 #endif /* HW_XEN_BUS_HELPER_H */
diff --git a/include/hw/xen/xen-bus.h b/include/hw/xen/xen-bus.h
index 5a90e79d53..f435898164 100644
--- a/include/hw/xen/xen-bus.h
+++ b/include/hw/xen/xen-bus.h
@@ -9,29 +9,23 @@
 #define HW_XEN_BUS_H
 
 #include "hw/xen/xen_backend_ops.h"
-#include "hw/xen/interface/io/xenbus.h"
 #include "hw/sysbus.h"
 #include "qemu/notify.h"
 #include "qom/object.h"
 
-typedef void (*XenWatchHandler)(void *opaque);
-
-typedef struct XenWatchList XenWatchList;
-typedef struct XenWatch XenWatch;
 typedef struct XenEventChannel XenEventChannel;
 
 struct XenDevice {
     DeviceState qdev;
     domid_t frontend_id;
     char *name;
-    struct xs_handle *xsh;
-    XenWatchList *watch_list;
+    struct qemu_xs_handle *xsh;
     char *backend_path, *frontend_path;
     enum xenbus_state backend_state, frontend_state;
     Notifier exit;
-    XenWatch *backend_state_watch, *frontend_state_watch;
+    struct qemu_xs_watch *backend_state_watch, *frontend_state_watch;
     bool backend_online;
-    XenWatch *backend_online_watch;
+    struct qemu_xs_watch *backend_online_watch;
     xengnttab_handle *xgth;
     bool inactive;
     QLIST_HEAD(, XenEventChannel) event_channels;
@@ -64,10 +58,9 @@ OBJECT_DECLARE_TYPE(XenDevice, XenDeviceClass, XEN_DEVICE)
 struct XenBus {
     BusState qbus;
     domid_t backend_id;
-    struct xs_handle *xsh;
-    XenWatchList *watch_list;
+    struct qemu_xs_handle *xsh;
     unsigned int backend_types;
-    XenWatch **backend_watch;
+    struct qemu_xs_watch **backend_watch;
     QLIST_HEAD(, XenDevice) inactive_devices;
 };
 
diff --git a/include/hw/xen/xen-legacy-backend.h b/include/hw/xen/xen-legacy-backend.h
index ea84a9246f..6c2ae709f5 100644
--- a/include/hw/xen/xen-legacy-backend.h
+++ b/include/hw/xen/xen-legacy-backend.h
@@ -2,7 +2,6 @@
 #define HW_XEN_LEGACY_BACKEND_H
 
 #include "hw/xen/xen_backend_ops.h"
-#include "hw/xen/interface/io/xenbus.h"
 #include "hw/xen/xen_pvdev.h"
 #include "net/net.h"
 #include "qom/object.h"
@@ -16,7 +15,7 @@ DECLARE_INSTANCE_CHECKER(XenLegacyDevice, XENBACKEND,
                          TYPE_XENBACKEND)
 
 /* variables */
-extern struct xs_handle *xenstore;
+extern struct qemu_xs_handle *xenstore;
 extern const char *xen_protocol;
 extern DeviceState *xen_sysdev;
 extern BusState *xen_sysbus;
@@ -31,9 +30,6 @@ int xenstore_write_be_int64(struct XenLegacyDevice *xendev, const char *node,
 char *xenstore_read_be_str(struct XenLegacyDevice *xendev, const char *node);
 int xenstore_read_be_int(struct XenLegacyDevice *xendev, const char *node,
                          int *ival);
-void xenstore_update_fe(char *watch, struct XenLegacyDevice *xendev);
-void xenstore_update_be(char *watch, char *type, int dom,
-                        struct XenDevOps *ops);
 char *xenstore_read_fe_str(struct XenLegacyDevice *xendev, const char *node);
 int xenstore_read_fe_int(struct XenLegacyDevice *xendev, const char *node,
                          int *ival);
diff --git a/include/hw/xen/xen_backend_ops.h b/include/hw/xen/xen_backend_ops.h
index 5bbe7bbc01..ffb5e54e80 100644
--- a/include/hw/xen/xen_backend_ops.h
+++ b/include/hw/xen/xen_backend_ops.h
@@ -12,6 +12,10 @@
 #ifndef QEMU_XEN_BACKEND_OPS_H
 #define QEMU_XEN_BACKEND_OPS_H
 
+#include "hw/xen/xen.h"
+#include "standard-headers/xen/xen.h"
+#include "standard-headers/xen/io/xenbus.h"
+
 /*
  * For the time being, these operations map fairly closely to the API of
  * the actual Xen libraries, e.g. libxenevtchn. As we complete the migration
@@ -36,6 +40,16 @@ typedef uint32_t grant_ref_t;
 #define XEN_PAGE_SIZE        (1UL << XEN_PAGE_SHIFT)
 #define XEN_PAGE_MASK        (~(XEN_PAGE_SIZE-1))
 
+#ifndef xen_rmb
+#define xen_rmb() smp_rmb()
+#endif
+#ifndef xen_wmb
+#define xen_wmb() smp_wmb()
+#endif
+#ifndef xen_mb
+#define xen_mb() smp_mb()
+#endif
+
 struct evtchn_backend_ops {
     xenevtchn_handle *(*open)(void);
     int (*bind_interdomain)(xenevtchn_handle *xc, uint32_t domid,
@@ -233,6 +247,147 @@ static inline int qemu_xen_foreignmem_unmap(void *addr, size_t pages)
     return xen_foreignmem_ops->unmap(addr, pages);
 }
 
+typedef void (*xs_watch_fn)(void *opaque, const char *path);
+
+struct qemu_xs_handle;
+struct qemu_xs_watch;
+typedef uint32_t xs_transaction_t;
+
+#define XBT_NULL 0
+
+#define XS_PERM_NONE  0x00
+#define XS_PERM_READ  0x01
+#define XS_PERM_WRITE 0x02
+
+struct xenstore_backend_ops {
+    struct qemu_xs_handle *(*open)(void);
+    void (*close)(struct qemu_xs_handle *h);
+    char *(*get_domain_path)(struct qemu_xs_handle *h, unsigned int domid);
+    char **(*directory)(struct qemu_xs_handle *h, xs_transaction_t t,
+                        const char *path, unsigned int *num);
+    void *(*read)(struct qemu_xs_handle *h, xs_transaction_t t,
+                  const char *path, unsigned int *len);
+    bool (*write)(struct qemu_xs_handle *h, xs_transaction_t t,
+                  const char *path, const void *data, unsigned int len);
+    bool (*create)(struct qemu_xs_handle *h, xs_transaction_t t,
+                   unsigned int owner, unsigned int domid,
+                   unsigned int perms, const char *path);
+    bool (*destroy)(struct qemu_xs_handle *h, xs_transaction_t t,
+               const char *path);
+    struct qemu_xs_watch *(*watch)(struct qemu_xs_handle *h, const char *path,
+                                   xs_watch_fn fn, void *opaque);
+    void (*unwatch)(struct qemu_xs_handle *h, struct qemu_xs_watch *w);
+    xs_transaction_t (*transaction_start)(struct qemu_xs_handle *h);
+    bool (*transaction_end)(struct qemu_xs_handle *h, xs_transaction_t t,
+                            bool abort);
+};
+
+extern struct xenstore_backend_ops *xen_xenstore_ops;
+
+static inline struct qemu_xs_handle *qemu_xen_xs_open(void)
+{
+    if (!xen_xenstore_ops) {
+        return NULL;
+    }
+    return xen_xenstore_ops->open();
+}
+
+static inline void qemu_xen_xs_close(struct qemu_xs_handle *h)
+{
+    if (!xen_xenstore_ops) {
+        return;
+    }
+    xen_xenstore_ops->close(h);
+}
+
+static inline char *qemu_xen_xs_get_domain_path(struct qemu_xs_handle *h, unsigned int domid)
+{
+    if (!xen_xenstore_ops) {
+        return NULL;
+    }
+    return xen_xenstore_ops->get_domain_path(h, domid);
+}
+
+static inline char **qemu_xen_xs_directory(struct qemu_xs_handle *h, xs_transaction_t t,
+                                           const char *path, unsigned int *num)
+{
+    if (!xen_xenstore_ops) {
+        return NULL;
+    }
+    return xen_xenstore_ops->directory(h, t, path, num);
+}
+
+static inline void *qemu_xen_xs_read(struct qemu_xs_handle *h, xs_transaction_t t,
+                                     const char *path, unsigned int *len)
+{
+    if (!xen_xenstore_ops) {
+        return NULL;
+    }
+    return xen_xenstore_ops->read(h, t, path, len);
+}
+
+static inline bool qemu_xen_xs_write(struct qemu_xs_handle *h, xs_transaction_t t,
+                                     const char *path, const void *data, unsigned int len)
+{
+    if (!xen_xenstore_ops) {
+        return false;
+    }
+    return xen_xenstore_ops->write(h, t, path, data, len);
+}
+
+static inline bool qemu_xen_xs_create(struct qemu_xs_handle *h, xs_transaction_t t,
+                                      unsigned int owner, unsigned int domid,
+                                      unsigned int perms, const char *path)
+{
+    if (!xen_xenstore_ops) {
+        return false;
+    }
+    return xen_xenstore_ops->create(h, t, owner, domid, perms, path);
+}
+
+static inline bool qemu_xen_xs_destroy(struct qemu_xs_handle *h, xs_transaction_t t,
+                                       const char *path)
+{
+    if (!xen_xenstore_ops) {
+        return false;
+    }
+    return xen_xenstore_ops->destroy(h, t, path);
+}
+
+static inline struct qemu_xs_watch *qemu_xen_xs_watch(struct qemu_xs_handle *h, const char *path,
+                                                      xs_watch_fn fn, void *opaque)
+{
+    if (!xen_xenstore_ops) {
+        return NULL;
+    }
+    return xen_xenstore_ops->watch(h, path, fn, opaque);
+}
+
+static inline void qemu_xen_xs_unwatch(struct qemu_xs_handle *h, struct qemu_xs_watch *w)
+{
+    if (!xen_xenstore_ops) {
+        return;
+    }
+    xen_xenstore_ops->unwatch(h, w);
+}
+
+static inline xs_transaction_t qemu_xen_xs_transaction_start(struct qemu_xs_handle *h)
+{
+    if (!xen_xenstore_ops) {
+        return XBT_NULL;
+    }
+    return xen_xenstore_ops->transaction_start(h);
+}
+
+static inline bool qemu_xen_xs_transaction_end(struct qemu_xs_handle *h, xs_transaction_t t,
+                                               bool abort)
+{
+    if (!xen_xenstore_ops) {
+        return false;
+    }
+    return xen_xenstore_ops->transaction_end(h, t, abort);
+}
+
 void setup_xen_backend_ops(void);
 
 #endif /* QEMU_XEN_BACKEND_OPS_H */
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_common.h
index 632ce617cc..7edcf3eb25 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_common.h
@@ -12,7 +12,6 @@
 
 #include <xenctrl.h>
 #include <xenstore.h>
-#include "hw/xen/interface/io/xenbus.h"
 
 #include "hw/xen/xen.h"
 #include "hw/pci/pci_device.h"
diff --git a/include/hw/xen/xen_pvdev.h b/include/hw/xen/xen_pvdev.h
index 9c27b14764..d8eea353b8 100644
--- a/include/hw/xen/xen_pvdev.h
+++ b/include/hw/xen/xen_pvdev.h
@@ -39,6 +39,7 @@ struct XenLegacyDevice {
     char               name[64];
     int                debug;
 
+    struct qemu_xs_watch *watch;
     enum xenbus_state  be_state;
     enum xenbus_state  fe_state;
     int                online;
@@ -64,7 +65,6 @@ int xenstore_write_int64(const char *base, const char *node, int64_t ival);
 char *xenstore_read_str(const char *base, const char *node);
 int xenstore_read_int(const char *base, const char *node, int *ival);
 int xenstore_read_uint64(const char *base, const char *node, uint64_t *uval);
-void xenstore_update(void *unused);
 
 const char *xenbus_strstate(enum xenbus_state state);
 
diff --git a/softmmu/globals.c b/softmmu/globals.c
index dda32986f7..39678aa8c5 100644
--- a/softmmu/globals.c
+++ b/softmmu/globals.c
@@ -68,3 +68,4 @@ bool xen_domid_restrict;
 struct evtchn_backend_ops *xen_evtchn_ops;
 struct gnttab_backend_ops *xen_gnttab_ops;
 struct foreignmem_backend_ops *xen_foreignmem_ops;
+struct xenstore_backend_ops *xen_xenstore_ops;
-- 
2.39.0
[RFC PATCH v7bis 07/19] hw/xen: Move xenstore_store_pv_console_info to xen_console.c
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

There's no need for this to be in the Xen accel code, and as we want to
use the Xen console support with KVM-emulated Xen we'll want to have a
platform-agnostic version of it. Make it use GString to build up the
path while we're at it.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 accel/xen/xen-all.c   | 61 -------------------------------------------
 hw/char/xen_console.c | 45 +++++++++++++++++++++++++++++--
 include/hw/xen/xen.h  |  2 --
 3 files changed, 43 insertions(+), 65 deletions(-)

diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
index 6df7b4ff34..514bc9eea4 100644
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -39,67 +39,6 @@ xc_interface *xen_xc;
 xenforeignmemory_handle *xen_fmem;
 xendevicemodel_handle *xen_dmod;
 
-static int store_dev_info(int domid, Chardev *cs, const char *string)
-{
-    struct xs_handle *xs = NULL;
-    char *path = NULL;
-    char *newpath = NULL;
-    char *pts = NULL;
-    int ret = -1;
-
-    /* Only continue if we're talking to a pty. */
-    if (!CHARDEV_IS_PTY(cs)) {
-        return 0;
-    }
-    pts = cs->filename + 4;
-
-    /* We now have everything we need to set the xenstore entry. */
-    xs = xs_open(0);
-    if (xs == NULL) {
-        fprintf(stderr, "Could not contact XenStore\n");
-        goto out;
-    }
-
-    path = xs_get_domain_path(xs, domid);
-    if (path == NULL) {
-        fprintf(stderr, "xs_get_domain_path() error\n");
-        goto out;
-    }
-    newpath = realloc(path, (strlen(path) + strlen(string) +
-                strlen("/tty") + 1));
-    if (newpath == NULL) {
-        fprintf(stderr, "realloc error\n");
-        goto out;
-    }
-    path = newpath;
-
-    strcat(path, string);
-    strcat(path, "/tty");
-    if (!xs_write(xs, XBT_NULL, path, pts, strlen(pts))) {
-        fprintf(stderr, "xs_write for '%s' fail", string);
-        goto out;
-    }
-    ret = 0;
-
-out:
-    free(path);
-    xs_close(xs);
-
-    return ret;
-}
-
-void xenstore_store_pv_console_info(int i, Chardev *chr)
-{
-    if (i == 0) {
-        store_dev_info(xen_domid, chr, "/console");
-    } else {
-        char buf[32];
-        snprintf(buf, sizeof(buf), "/device/console/%d", i);
-        store_dev_info(xen_domid, chr, buf);
-    }
-}
-
-
 static void xenstore_record_dm_state(const char *state)
 {
     struct xs_handle *xs;
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index ad8638a86d..c7a19c0e7c 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -173,6 +173,48 @@ static void xencons_send(struct XenConsole *con)
 
 /* -------------------------------------------------------------------- */
 
+static int store_con_info(struct XenConsole *con)
+{
+    Chardev *cs = qemu_chr_fe_get_driver(&con->chr);
+    char *pts = NULL;
+    char *dom_path;
+    GString *path;
+    int ret = -1;
+
+    /* Only continue if we're talking to a pty. */
+    if (!CHARDEV_IS_PTY(cs)) {
+        return 0;
+    }
+    pts = cs->filename + 4;
+
+    dom_path = qemu_xen_xs_get_domain_path(xenstore, xen_domid);
+    if (!dom_path) {
+        return 0;
+    }
+
+    path = g_string_new(dom_path);
+    free(dom_path);
+
+    if (con->xendev.dev) {
+        g_string_append_printf(path, "/device/console/%d", con->xendev.dev);
+    } else {
+        g_string_append(path, "/console");
+    }
+    g_string_append(path, "/tty");
+
+    if (xenstore_write_str(con->console, path->str, pts)) {
+        fprintf(stderr, "xenstore_write_str for '%s' fail", path->str);
+        goto out;
+    }
+    ret = 0;
+
+out:
+    g_string_free(path, true);
+    free(path);
+
+    return ret;
+}
+
 static int con_init(struct XenLegacyDevice *xendev)
 {
     struct XenConsole *con = container_of(xendev, struct XenConsole, xendev);
@@ -215,8 +257,7 @@ static int con_init(struct XenLegacyDevice *xendev)
                          &error_abort);
     }
 
-    xenstore_store_pv_console_info(con->xendev.dev,
-                                   qemu_chr_fe_get_driver(&con->chr));
+    store_con_info(con);
 
 out:
     g_free(type);
diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
index b3873c581b..fcc6c5b522 100644
--- a/include/hw/xen/xen.h
+++ b/include/hw/xen/xen.h
@@ -39,8 +39,6 @@ int xen_is_pirq_msi(uint32_t msi_data);
 
 qemu_irq *xen_interrupt_controller_init(void);
 
-void xenstore_store_pv_console_info(int i, Chardev *chr);
-
 void xen_register_framebuffer(struct MemoryRegion *mr);
 
 #endif /* QEMU_HW_XEN_H */
-- 
2.39.0
[RFC PATCH v7bis 08/19] hw/xen: Use XEN_PAGE_SIZE in PV backend drivers
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

XC_PAGE_SIZE comes from the actual Xen libraries, while XEN_PAGE_SIZE is
provided by QEMU itself in xen_backend_ops.h. For backends which may be
built for emulation mode, use the latter.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/block/dataplane/xen-block.c |  8 ++++----
 hw/display/xenfb.c             | 12 ++++++------
 hw/net/xen_nic.c               | 12 ++++++------
 hw/usb/xen-usb.c               |  8 ++++----
 4 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index e55b713002..8322a1de82 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -101,9 +101,9 @@ static XenBlockRequest *xen_block_start_request(XenBlockDataPlane *dataplane)
          * re-use requests, allocate the memory once here. It will be freed
          * xen_block_dataplane_destroy() when the request list is freed.
          */
-        request->buf = qemu_memalign(XC_PAGE_SIZE,
+        request->buf = qemu_memalign(XEN_PAGE_SIZE,
                                      BLKIF_MAX_SEGMENTS_PER_REQUEST *
-                                     XC_PAGE_SIZE);
+                                     XEN_PAGE_SIZE);
         dataplane->requests_total++;
         qemu_iovec_init(&request->v, 1);
     } else {
@@ -185,7 +185,7 @@ static int xen_block_parse_request(XenBlockRequest *request)
             goto err;
         }
         if (request->req.seg[i].last_sect * dataplane->sector_size >=
-            XC_PAGE_SIZE) {
+            XEN_PAGE_SIZE) {
             error_report("error: page crossing");
             goto err;
         }
@@ -740,7 +740,7 @@ void xen_block_dataplane_start(XenBlockDataPlane *dataplane,
 
     dataplane->protocol = protocol;
 
-    ring_size = XC_PAGE_SIZE * dataplane->nr_ring_ref;
+    ring_size = XEN_PAGE_SIZE * dataplane->nr_ring_ref;
     switch (dataplane->protocol) {
     case BLKIF_PROTOCOL_NATIVE:
     {
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 2c4016fcbd..0074a9b6f8 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -489,13 +489,13 @@ static int xenfb_map_fb(struct XenFB *xenfb)
     }
 
     if (xenfb->pixels) {
-        munmap(xenfb->pixels, xenfb->fbpages * XC_PAGE_SIZE);
+        munmap(xenfb->pixels, xenfb->fbpages * XEN_PAGE_SIZE);
         xenfb->pixels = NULL;
     }
 
-    xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XC_PAGE_SIZE);
+    xenfb->fbpages = DIV_ROUND_UP(xenfb->fb_len, XEN_PAGE_SIZE);
     n_fbdirs = xenfb->fbpages * mode / 8;
-    n_fbdirs = DIV_ROUND_UP(n_fbdirs, XC_PAGE_SIZE);
+    n_fbdirs = DIV_ROUND_UP(n_fbdirs, XEN_PAGE_SIZE);
 
     pgmfns = g_new0(xen_pfn_t, n_fbdirs);
     fbmfns = g_new0(xen_pfn_t, xenfb->fbpages);
@@ -528,8 +528,8 @@ static int xenfb_configure_fb(struct XenFB *xenfb, size_t fb_len_lim,
 {
     size_t mfn_sz = sizeof_field(struct xenfb_page, pd[0]);
     size_t pd_len = sizeof_field(struct xenfb_page, pd) / mfn_sz;
-    size_t fb_pages = pd_len * XC_PAGE_SIZE / mfn_sz;
-    size_t fb_len_max = fb_pages * XC_PAGE_SIZE;
+    size_t fb_pages = pd_len * XEN_PAGE_SIZE / mfn_sz;
+    size_t fb_len_max = fb_pages * XEN_PAGE_SIZE;
     int max_width, max_height;
 
     if (fb_len_lim > fb_len_max) {
@@ -930,7 +930,7 @@ static void fb_disconnect(struct XenLegacyDevice *xendev)
      *   instead.  This releases the guest pages and keeps qemu happy.
      */
     qemu_xen_foreignmem_unmap(fb->pixels, fb->fbpages);
-    fb->pixels = mmap(fb->pixels, fb->fbpages * XC_PAGE_SIZE,
+    fb->pixels = mmap(fb->pixels, fb->fbpages * XEN_PAGE_SIZE,
                       PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANON,
                       -1, 0);
     if (fb->pixels == MAP_FAILED) {
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 166d03787d..9bbf6599fc 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -145,7 +145,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
                 continue;
             }
 
-            if ((txreq.offset + txreq.size) > XC_PAGE_SIZE) {
+            if ((txreq.offset + txreq.size) > XEN_PAGE_SIZE) {
                 xen_pv_printf(&netdev->xendev, 0, "error: page crossing\n");
                 net_tx_error(netdev, &txreq, rc);
                 continue;
@@ -171,7 +171,7 @@ static void net_tx_packets(struct XenNetDev *netdev)
             if (txreq.flags & NETTXF_csum_blank) {
                 /* have read-only mapping -> can't fill checksum in-place */
                 if (!tmpbuf) {
-                    tmpbuf = g_malloc(XC_PAGE_SIZE);
+                    tmpbuf = g_malloc(XEN_PAGE_SIZE);
                 }
                 memcpy(tmpbuf, page + txreq.offset, txreq.size);
                 net_checksum_calculate(tmpbuf, txreq.size, CSUM_ALL);
@@ -243,9 +243,9 @@ static ssize_t net_rx_packet(NetClientState *nc, const uint8_t *buf, size_t size
     if (rc == rp || RING_REQUEST_CONS_OVERFLOW(&netdev->rx_ring, rc)) {
         return 0;
     }
-    if (size > XC_PAGE_SIZE - NET_IP_ALIGN) {
+    if (size > XEN_PAGE_SIZE - NET_IP_ALIGN) {
         xen_pv_printf(&netdev->xendev, 0, "packet too big (%lu > %ld)",
-                      (unsigned long)size, XC_PAGE_SIZE - NET_IP_ALIGN);
+                      (unsigned long)size, XEN_PAGE_SIZE - NET_IP_ALIGN);
         return -1;
     }
 
@@ -348,8 +348,8 @@ static int net_connect(struct XenLegacyDevice *xendev)
         netdev->txs = NULL;
         return -1;
     }
-    BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XC_PAGE_SIZE);
-    BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XC_PAGE_SIZE);
+    BACK_RING_INIT(&netdev->tx_ring, netdev->txs, XEN_PAGE_SIZE);
+    BACK_RING_INIT(&netdev->rx_ring, netdev->rxs, XEN_PAGE_SIZE);
 
     xen_be_bind_evtchn(&netdev->xendev);
 
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index a770a64cb4..66cb3f7c24 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -161,7 +161,7 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req)
 
     for (i = 0; i < nr_segs; i++) {
         if ((unsigned)usbback_req->req.seg[i].offset +
-            (unsigned)usbback_req->req.seg[i].length > XC_PAGE_SIZE) {
+            (unsigned)usbback_req->req.seg[i].length > XEN_PAGE_SIZE) {
             xen_pv_printf(xendev, 0, "segment crosses page boundary\n");
             return -EINVAL;
         }
@@ -185,7 +185,7 @@ static int usbback_gnttab_map(struct usbback_req *usbback_req)
 
         for (i = 0; i < usbback_req->nr_buffer_segs; i++) {
             seg = usbback_req->req.seg + i;
-            addr = usbback_req->buffer + i * XC_PAGE_SIZE + seg->offset;
+            addr = usbback_req->buffer + i * XEN_PAGE_SIZE + seg->offset;
             qemu_iovec_add(&usbback_req->packet.iov, addr, seg->length);
         }
     }
@@ -902,8 +902,8 @@ static int usbback_connect(struct XenLegacyDevice *xendev)
     usbif->conn_ring_ref = conn_ring_ref;
     urb_sring = usbif->urb_sring;
     conn_sring = usbif->conn_sring;
-    BACK_RING_INIT(&usbif->urb_ring, urb_sring, XC_PAGE_SIZE);
-    BACK_RING_INIT(&usbif->conn_ring, conn_sring, XC_PAGE_SIZE);
+    BACK_RING_INIT(&usbif->urb_ring, urb_sring, XEN_PAGE_SIZE);
+    BACK_RING_INIT(&usbif->conn_ring, conn_sring, XEN_PAGE_SIZE);
 
     xen_be_bind_evtchn(xendev);
 
-- 
2.39.0
[RFC PATCH v7bis 09/19] hw/xen: Rename xen_common.h to xen_native.h
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

This header is now only for native Xen code, not PV backends that may be
used in Xen emulation. Since the toolstack libraries may depend on the
specific version of Xen headers that they pull in (and will set the
__XEN_TOOLS__ macro to enable internal definitions that they depend on),
the rule is that xen_native.h (and thus the toolstack library headers)
must be included *before* any of the headers in standard-headers/xen.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 accel/xen/xen-all.c                           |  1 +
 hw/9pfs/xen-9p-backend.c                      |  1 +
 hw/block/dataplane/xen-block.c                |  3 ++-
 hw/block/xen-block.c                          |  1 -
 hw/i386/xen/xen-hvm.c                         |  6 ++---
 hw/i386/xen/xen-mapcache.c                    |  2 +-
 hw/i386/xen/xen_platform.c                    |  7 +++---
 hw/xen/trace-events                           |  2 +-
 hw/xen/xen-operations.c                       |  2 +-
 hw/xen/xen_pt.c                               |  2 +-
 hw/xen/xen_pt.h                               |  2 +-
 hw/xen/xen_pt_config_init.c                   |  2 +-
 hw/xen/xen_pt_msi.c                           |  4 ++--
 include/hw/xen/xen.h                          | 22 ++++++++++++-------
 include/hw/xen/{xen_common.h => xen_native.h} | 10 ++++++---
 include/hw/xen/xen_pvdev.h                    |  3 ++-
 16 files changed, 42 insertions(+), 28 deletions(-)
 rename include/hw/xen/{xen_common.h => xen_native.h} (98%)

diff --git a/accel/xen/xen-all.c b/accel/xen/xen-all.c
index 514bc9eea4..fcdc8364bf 100644
--- a/accel/xen/xen-all.c
+++ b/accel/xen/xen-all.c
@@ -12,6 +12,7 @@
 #include "qemu/error-report.h"
 #include "qemu/module.h"
 #include "qapi/error.h"
+#include "hw/xen/xen_native.h"
 #include "hw/xen/xen-legacy-backend.h"
 #include "hw/xen/xen_pt.h"
 #include "chardev/char.h"
diff --git a/hw/9pfs/xen-9p-backend.c b/hw/9pfs/xen-9p-backend.c
index d8bb0e847c..74f3a05f88 100644
--- a/hw/9pfs/xen-9p-backend.c
+++ b/hw/9pfs/xen-9p-backend.c
@@ -22,6 +22,7 @@
 #include "qemu/config-file.h"
 #include "qemu/main-loop.h"
 #include "qemu/option.h"
+#include "qemu/iov.h"
 #include "fsdev/qemu-fsdev.h"
 
 #define VERSIONS "1"
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index 8322a1de82..d44e39e2a1 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -23,8 +23,9 @@
 #include "qemu/main-loop.h"
 #include "qemu/memalign.h"
 #include "qapi/error.h"
-#include "hw/xen/xen_common.h"
 #include "hw/block/xen_blkif.h"
+#include "hw/xen/xen.h"
+#include "hw/xen/interface/io/ring.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/iothread.h"
 #include "xen-block.h"
diff --git a/hw/block/xen-block.c b/hw/block/xen-block.c
index 345b284d70..87299615e3 100644
--- a/hw/block/xen-block.c
+++ b/hw/block/xen-block.c
@@ -19,7 +19,6 @@
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qstring.h"
 #include "qom/object_interfaces.h"
-#include "hw/xen/xen_common.h"
 #include "hw/block/xen_blkif.h"
 #include "hw/qdev-properties.h"
 #include "hw/xen/xen-block.h"
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index 38146efb5a..779d923e10 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -18,8 +18,8 @@
 #include "hw/irq.h"
 #include "hw/hw.h"
 #include "hw/i386/apic-msidef.h"
+#include "hw/xen/xen_native.h"
 #include "hw/xen/xen_backend_ops.h"
-#include "hw/xen/xen_common.h"
 #include "hw/xen/xen-legacy-backend.h"
 #include "hw/xen/xen-bus.h"
 #include "hw/xen/xen-x86.h"
@@ -54,8 +54,8 @@ static bool xen_in_migration;
 /* Compatibility with older version */
 
 /* This allows QEMU to build on a system that has Xen 4.5 or earlier
- * installed.  This here (not in hw/xen/xen_common.h) because xen/hvm/ioreq.h
- * needs to be included before this block and hw/xen/xen_common.h needs to
+ * installed.  This here (not in hw/xen/xen_native.h) because xen/hvm/ioreq.h
+ * needs to be included before this block and hw/xen/xen_native.h needs to
  * be included before xen/hvm/ioreq.h
  */
 #ifndef IOREQ_TYPE_VMWARE_PORT
diff --git a/hw/i386/xen/xen-mapcache.c b/hw/i386/xen/xen-mapcache.c
index a2f93096e7..30cea6ee42 100644
--- a/hw/i386/xen/xen-mapcache.c
+++ b/hw/i386/xen/xen-mapcache.c
@@ -14,7 +14,7 @@
 
 #include <sys/resource.h>
 
-#include "hw/xen/xen-legacy-backend.h"
+#include "hw/xen/xen_native.h"
 #include "qemu/bitmap.h"
 
 #include "sysemu/runstate.h"
diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c
index 00f0527b30..a5fb1c96d0 100644
--- a/hw/i386/xen/xen_platform.c
+++ b/hw/i386/xen/xen_platform.c
@@ -29,7 +29,6 @@
 #include "hw/ide/pci.h"
 #include "hw/pci/pci.h"
 #include "migration/vmstate.h"
-#include "hw/xen/xen.h"
 #include "net/net.h"
 #include "trace.h"
 #include "sysemu/xen.h"
@@ -39,10 +38,12 @@
 #include "qom/object.h"
 
 #ifdef CONFIG_XEN
-#include "hw/xen/xen_common.h"
-#include "hw/xen/xen-legacy-backend.h"
+#include "hw/xen/xen_native.h"
 #endif
 
+/* The rule is that xen_native.h must come first */
+#include "hw/xen/xen.h"
+
 //#define DEBUG_PLATFORM
 
 #ifdef DEBUG_PLATFORM
diff --git a/hw/xen/trace-events b/hw/xen/trace-events
index 3da3fd8348..55c9e1df68 100644
--- a/hw/xen/trace-events
+++ b/hw/xen/trace-events
@@ -1,6 +1,6 @@
 # See docs/devel/tracing.rst for syntax documentation.
 
-# ../../include/hw/xen/xen_common.h
+# ../../include/hw/xen/xen_native.h
 xen_default_ioreq_server(void) ""
 xen_ioreq_server_create(uint32_t id) "id: %u"
 xen_ioreq_server_destroy(uint32_t id) "id: %u"
diff --git a/hw/xen/xen-operations.c b/hw/xen/xen-operations.c
index b2b76c63af..352153c974 100644
--- a/hw/xen/xen-operations.c
+++ b/hw/xen/xen-operations.c
@@ -13,8 +13,8 @@
 #include "qemu/uuid.h"
 #include "qapi/error.h"
 
+#include "hw/xen/xen_native.h"
 #include "hw/xen/xen_backend_ops.h"
-#include "hw/xen/xen_common.h"
 
 /*
  * If we have new enough libxenctrl then we do not want/need these compat
diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c
index 0ec7e52183..d0b68925b5 100644
--- a/hw/xen/xen_pt.c
+++ b/hw/xen/xen_pt.c
@@ -59,9 +59,9 @@
 #include "hw/pci/pci.h"
 #include "hw/qdev-properties.h"
 #include "hw/qdev-properties-system.h"
+#include "xen_pt.h"
 #include "hw/xen/xen.h"
 #include "hw/xen/xen-legacy-backend.h"
-#include "xen_pt.h"
 #include "qemu/range.h"
 
 static bool has_igd_gfx_passthru;
diff --git a/hw/xen/xen_pt.h b/hw/xen/xen_pt.h
index cf10fc7bbf..27fc7bb585 100644
--- a/hw/xen/xen_pt.h
+++ b/hw/xen/xen_pt.h
@@ -1,7 +1,7 @@
 #ifndef XEN_PT_H
 #define XEN_PT_H
 
-#include "hw/xen/xen_common.h"
+#include "hw/xen/xen_native.h"
 #include "xen-host-pci-device.h"
 #include "qom/object.h"
 
diff --git a/hw/xen/xen_pt_config_init.c b/hw/xen/xen_pt_config_init.c
index cde898b744..0cd9fbd22c 100644
--- a/hw/xen/xen_pt_config_init.c
+++ b/hw/xen/xen_pt_config_init.c
@@ -15,8 +15,8 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qemu/timer.h"
-#include "hw/xen/xen-legacy-backend.h"
 #include "xen_pt.h"
+#include "hw/xen/xen-legacy-backend.h"
 
 #define XEN_PT_MERGE_VALUE(value, data, val_mask) \
     (((value) & (val_mask)) | ((data) & ~(val_mask)))
diff --git a/hw/xen/xen_pt_msi.c b/hw/xen/xen_pt_msi.c
index b71563f98a..09cca4eecb 100644
--- a/hw/xen/xen_pt_msi.c
+++ b/hw/xen/xen_pt_msi.c
@@ -11,9 +11,9 @@
 
 #include "qemu/osdep.h"
 
-#include "hw/xen/xen-legacy-backend.h"
-#include "xen_pt.h"
 #include "hw/i386/apic-msidef.h"
+#include "xen_pt.h"
+#include "hw/xen/xen-legacy-backend.h"
 
 
 #define XEN_PT_AUTO_ASSIGN -1
diff --git a/include/hw/xen/xen.h b/include/hw/xen/xen.h
index fcc6c5b522..416c833a29 100644
--- a/include/hw/xen/xen.h
+++ b/include/hw/xen/xen.h
@@ -8,15 +8,21 @@
 #define QEMU_HW_XEN_H
 
 /*
- * As a temporary measure while the headers are being untangled, define
- * __XEN_TOOLS__ here before any Xen headers are included. Otherwise, if
- * the Xen toolstack library headers are later included, they will find
- * some of the "internal" definitions missing and the build will fail. In
- * later commits, we'll end up with a rule that the native libraries have
- * to be included first, which will ensure that the libraries get the
- * version of Xen libraries that they expect.
+ * C files using Xen toolstack libraries will have included those headers
+ * already via xen_native.h, and hving __XEM_TOOLS__ defined will have
+ * automatically set __XEN_INTERFACE_VERSION__ to the latest supported
+ * by the *system* Xen headers which were transitively included.
+ *
+ * C files which are part of the internal emulation, and which did not
+ * include xen_native.h may need this defined so that the Xen headers
+ * imported to standard-headers/xen will expose the appropriate API
+ * version.
+ *
+ * This is why there's a rule that xen_native.h must be included first.
  */
-#define __XEN_TOOLS__ 1
+#ifndef __XEN_INTERFACE_VERSION__
+#define __XEN_INTERFACE_VERSION__ 0x00040e00
+#endif
 
 #include "exec/cpu-common.h"
 
diff --git a/include/hw/xen/xen_common.h b/include/hw/xen/xen_native.h
similarity index 98%
rename from include/hw/xen/xen_common.h
rename to include/hw/xen/xen_native.h
index 7edcf3eb25..6bcc83baf9 100644
--- a/include/hw/xen/xen_common.h
+++ b/include/hw/xen/xen_native.h
@@ -1,5 +1,9 @@
-#ifndef QEMU_HW_XEN_COMMON_H
-#define QEMU_HW_XEN_COMMON_H
+#ifndef QEMU_HW_XEN_NATIVE_H
+#define QEMU_HW_XEN_NATIVE_H
+
+#ifdef __XEN_INTERFACE_VERSION__
+#error In Xen native files, include xen_native.h before other Xen headers
+#endif
 
 /*
  * If we have new enough libxenctrl then we do not want/need these compat
@@ -622,4 +626,4 @@ static inline int xen_set_ioreq_server_state(domid_t dom,
 
 #endif
 
-#endif /* QEMU_HW_XEN_COMMON_H */
+#endif /* QEMU_HW_XEN_NATIVE_H */
diff --git a/include/hw/xen/xen_pvdev.h b/include/hw/xen/xen_pvdev.h
index d8eea353b8..ddad4b9f36 100644
--- a/include/hw/xen/xen_pvdev.h
+++ b/include/hw/xen/xen_pvdev.h
@@ -1,8 +1,9 @@
 #ifndef QEMU_HW_XEN_PVDEV_H
 #define QEMU_HW_XEN_PVDEV_H
 
+#include "hw/qdev-core.h"
 #include "hw/xen/xen_backend_ops.h"
-#include "hw/xen/xen_common.h"
+
 /* ------------------------------------------------------------- */
 
 #define XEN_BUFSIZE 1024
-- 
2.39.0
[RFC PATCH v7bis 10/19] hw/xen: Build PV backend drivers for XENFV_MACHINE
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

Now that we have the redirectable Xen backend operations we can build the
PV backends even without the Xen libraries.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/9pfs/meson.build            | 2 +-
 hw/block/dataplane/meson.build | 2 +-
 hw/block/meson.build           | 2 +-
 hw/char/meson.build            | 2 +-
 hw/display/meson.build         | 2 +-
 hw/usb/meson.build             | 2 +-
 hw/xen/meson.build             | 5 ++++-
 7 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/hw/9pfs/meson.build b/hw/9pfs/meson.build
index 12443b6ad5..ac5f37f229 100644
--- a/hw/9pfs/meson.build
+++ b/hw/9pfs/meson.build
@@ -15,7 +15,7 @@ fs_ss.add(files(
 ))
 fs_ss.add(when: 'CONFIG_LINUX', if_true: files('9p-util-linux.c'))
 fs_ss.add(when: 'CONFIG_DARWIN', if_true: files('9p-util-darwin.c'))
-fs_ss.add(when: 'CONFIG_XEN', if_true: files('xen-9p-backend.c'))
+fs_ss.add(when: 'CONFIG_XENFV_MACHINE', if_true: files('xen-9p-backend.c'))
 softmmu_ss.add_all(when: 'CONFIG_FSDEV_9P', if_true: fs_ss)
 
 specific_ss.add(when: 'CONFIG_VIRTIO_9P', if_true: files('virtio-9p-device.c'))
diff --git a/hw/block/dataplane/meson.build b/hw/block/dataplane/meson.build
index 12c6a264f1..ca7b62fdc6 100644
--- a/hw/block/dataplane/meson.build
+++ b/hw/block/dataplane/meson.build
@@ -1,2 +1,2 @@
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c'))
-specific_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
+specific_ss.add(when: 'CONFIG_XENFV_MACHINE', if_true: files('xen-block.c'))
diff --git a/hw/block/meson.build b/hw/block/meson.build
index b434d5654c..c51539970b 100644
--- a/hw/block/meson.build
+++ b/hw/block/meson.build
@@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PFLASH_CFI02', if_true: files('pflash_cfi02.c'))
 softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80.c'))
 softmmu_ss.add(when: 'CONFIG_SSI_M25P80', if_true: files('m25p80_sfdp.c'))
 softmmu_ss.add(when: 'CONFIG_SWIM', if_true: files('swim.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen-block.c'))
+softmmu_ss.add(when: 'CONFIG_XENFV_MACHINE', if_true: files('xen-block.c'))
 softmmu_ss.add(when: 'CONFIG_TC58128', if_true: files('tc58128.c'))
 
 specific_ss.add(when: 'CONFIG_VIRTIO_BLK', if_true: files('virtio-blk.c', 'virtio-blk-common.c'))
diff --git a/hw/char/meson.build b/hw/char/meson.build
index 7b594f51b8..d6ef21bfe1 100644
--- a/hw/char/meson.build
+++ b/hw/char/meson.build
@@ -18,7 +18,7 @@ softmmu_ss.add(when: 'CONFIG_SERIAL_PCI', if_true: files('serial-pci.c'))
 softmmu_ss.add(when: 'CONFIG_SERIAL_PCI_MULTI', if_true: files('serial-pci-multi.c'))
 softmmu_ss.add(when: 'CONFIG_SHAKTI_UART', if_true: files('shakti_uart.c'))
 softmmu_ss.add(when: 'CONFIG_VIRTIO_SERIAL', if_true: files('virtio-console.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xen_console.c'))
+softmmu_ss.add(when: 'CONFIG_XENFV_MACHINE', if_true: files('xen_console.c'))
 softmmu_ss.add(when: 'CONFIG_XILINX', if_true: files('xilinx_uartlite.c'))
 
 softmmu_ss.add(when: 'CONFIG_AVR_USART', if_true: files('avr_usart.c'))
diff --git a/hw/display/meson.build b/hw/display/meson.build
index f860c2c562..76852ab2de 100644
--- a/hw/display/meson.build
+++ b/hw/display/meson.build
@@ -14,7 +14,7 @@ softmmu_ss.add(when: 'CONFIG_PL110', if_true: files('pl110.c'))
 softmmu_ss.add(when: 'CONFIG_SII9022', if_true: files('sii9022.c'))
 softmmu_ss.add(when: 'CONFIG_SSD0303', if_true: files('ssd0303.c'))
 softmmu_ss.add(when: 'CONFIG_SSD0323', if_true: files('ssd0323.c'))
-softmmu_ss.add(when: 'CONFIG_XEN', if_true: files('xenfb.c'))
+softmmu_ss.add(when: 'CONFIG_XENFV_MACHINE', if_true: files('xenfb.c'))
 
 softmmu_ss.add(when: 'CONFIG_VGA_PCI', if_true: files('vga-pci.c'))
 softmmu_ss.add(when: 'CONFIG_VGA_ISA', if_true: files('vga-isa.c'))
diff --git a/hw/usb/meson.build b/hw/usb/meson.build
index 793df42e21..440df35028 100644
--- a/hw/usb/meson.build
+++ b/hw/usb/meson.build
@@ -84,6 +84,6 @@ if libusb.found()
   hw_usb_modules += {'host': usbhost_ss}
 endif
 
-softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XEN', libusb], if_true: files('xen-usb.c'))
+softmmu_ss.add(when: ['CONFIG_USB', 'CONFIG_XENFV_MACHINE', libusb], if_true: files('xen-usb.c'))
 
 modules += { 'hw-usb': hw_usb_modules }
diff --git a/hw/xen/meson.build b/hw/xen/meson.build
index f195bbd25c..0d1eddbca6 100644
--- a/hw/xen/meson.build
+++ b/hw/xen/meson.build
@@ -1,10 +1,13 @@
-softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files(
+softmmu_ss.add(when: ['CONFIG_XENFV_MACHINE'], if_true: files(
   'xen-backend.c',
   'xen-bus-helper.c',
   'xen-bus.c',
   'xen-legacy-backend.c',
   'xen_devconfig.c',
   'xen_pvdev.c',
+))
+
+softmmu_ss.add(when: ['CONFIG_XEN', xen], if_true: files(
   'xen-operations.c',
 ))
 
-- 
2.39.0
[RFC PATCH v7bis 11/19] hw/xen: Map guest XENSTORE_PFN grant in emulated Xenstore
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/i386/kvm/xen_xenstore.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/hw/i386/kvm/xen_xenstore.c b/hw/i386/kvm/xen_xenstore.c
index 6369e29f59..6e712816d3 100644
--- a/hw/i386/kvm/xen_xenstore.c
+++ b/hw/i386/kvm/xen_xenstore.c
@@ -21,6 +21,7 @@
 
 #include "hw/sysbus.h"
 #include "hw/xen/xen.h"
+#include "hw/xen/xen_backend_ops.h"
 #include "xen_overlay.h"
 #include "xen_evtchn.h"
 #include "xen_xenstore.h"
@@ -30,13 +31,11 @@
 
 #include "standard-headers/xen/io/xs_wire.h"
 #include "standard-headers/xen/event_channel.h"
+#include "standard-headers/xen/grant_table.h"
 
 #define TYPE_XEN_XENSTORE "xen-xenstore"
 OBJECT_DECLARE_SIMPLE_TYPE(XenXenstoreState, XEN_XENSTORE)
 
-#define XEN_PAGE_SHIFT 12
-#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
-
 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
 
@@ -59,6 +58,8 @@ struct XenXenstoreState {
     evtchn_port_t guest_port;
     evtchn_port_t be_port;
     struct xenevtchn_handle *eh;
+    struct xengntdev_handle *gt;
+    void *granted_xs;
 };
 
 struct XenXenstoreState *xen_xenstore_singleton;
@@ -461,5 +462,11 @@ int xen_xenstore_reset(void)
     }
     s->be_port = err;
 
+    s->gt = qemu_xen_gnttab_open();
+    uint32_t xs_gntref = GNTTAB_RESERVED_XENSTORE;
+    s->granted_xs = qemu_xen_gnttab_map_refs(s->gt, 1, xen_domid,
+                                             &xs_gntref, PROT_READ|PROT_WRITE);
+    printf("Granted XS %p\n", s->granted_xs);
+
     return 0;
 }
-- 
2.39.0
[RFC PATCH v7bis 12/19] hw/xen: Add backend implementation of grant table operations
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

This is limited to mapping a single grant at a time, because under Xen the
pages are mapped *contiguously* into qemu's address space, and that's very
hard to do when those pages actually come from anonymous mappings in qemu
in the first place.

Eventually perhaps we can look at using shared mappings of actual objects
for system RAM, and then we can make new mappings of the same backing
store (be it deleted files, shmem, whatever). But for now let's stuck to
a page at a time.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/i386/kvm/xen_gnttab.c | 294 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 291 insertions(+), 3 deletions(-)

diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c
index 5e8fc0184e..8008a391bd 100644
--- a/hw/i386/kvm/xen_gnttab.c
+++ b/hw/i386/kvm/xen_gnttab.c
@@ -21,6 +21,7 @@
 
 #include "hw/sysbus.h"
 #include "hw/xen/xen.h"
+#include "hw/xen/xen_backend_ops.h"
 #include "xen_overlay.h"
 #include "xen_gnttab.h"
 
@@ -33,12 +34,11 @@
 #define TYPE_XEN_GNTTAB "xen-gnttab"
 OBJECT_DECLARE_SIMPLE_TYPE(XenGnttabState, XEN_GNTTAB)
 
-#define XEN_PAGE_SHIFT 12
-#define XEN_PAGE_SIZE (1ULL << XEN_PAGE_SHIFT)
-
 #define ENTRIES_PER_FRAME_V1 (XEN_PAGE_SIZE / sizeof(grant_entry_v1_t))
 #define ENTRIES_PER_FRAME_V2 (XEN_PAGE_SIZE / sizeof(grant_entry_v2_t))
 
+static struct gnttab_backend_ops emu_gnttab_backend_ops;
+
 struct XenGnttabState {
     /*< private >*/
     SysBusDevice busdev;
@@ -57,6 +57,8 @@ struct XenGnttabState {
     MemoryRegion gnt_frames;
     MemoryRegion *gnt_aliases;
     uint64_t *gnt_frame_gpas;
+
+    uint8_t *map_track;
 };
 
 struct XenGnttabState *xen_gnttab_singleton;
@@ -88,9 +90,15 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp)
         s->gnt_frame_gpas[i] = INVALID_GPA;
     }
 
+    s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
+    s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
     qemu_mutex_init(&s->gnt_lock);
 
     xen_gnttab_singleton = s;
+
+    s->map_track = g_new0(uint8_t, s->max_frames * ENTRIES_PER_FRAME_V1);
+
+    xen_gnttab_ops = &emu_gnttab_backend_ops;
 }
 
 static int xen_gnttab_post_load(void *opaque, int version_id)
@@ -236,3 +244,283 @@ int xen_gnttab_query_size_op(struct gnttab_query_size *size)
     size->max_nr_frames = s->max_frames;
     return 0;
 }
+
+/* Track per-open refs, to allow close() to clean up. */
+struct active_ref {
+    MemoryRegionSection mrs;
+    void *virtaddr;
+    uint32_t refcnt;
+    int prot;
+};
+
+static void gnt_unref(XenGnttabState *s, grant_ref_t ref,
+                      MemoryRegionSection *mrs, int prot)
+{
+    if (mrs && mrs->mr) {
+       if (prot & PROT_WRITE) {
+            memory_region_set_dirty(mrs->mr, mrs->offset_within_region,
+                                    XEN_PAGE_SIZE);
+        }
+        memory_region_unref(mrs->mr);
+        mrs->mr = NULL;
+    }
+    assert(s->map_track[ref] != 0);
+
+    if (--s->map_track[ref] == 0) {
+        grant_entry_v1_t *gnt_p = &s->entries.v1[ref];
+        qatomic_and(&gnt_p->flags, ~(GTF_reading | GTF_writing));
+    }
+}
+
+static uint64_t gnt_ref(XenGnttabState *s, grant_ref_t ref, int prot)
+{
+    uint16_t mask = GTF_type_mask | GTF_sub_page;
+    volatile grant_entry_v1_t *gnt_p;
+    grant_entry_v1_t gnt;
+    int retries = 0;
+
+    if (ref >= s->max_frames * ENTRIES_PER_FRAME_V1 ||
+        s->map_track[ref] == UINT8_MAX) {
+        return INVALID_GPA;
+    }
+
+    if (prot & PROT_WRITE) {
+        mask |= GTF_readonly;
+    }
+
+    gnt_p = &s->entries.v1[ref];
+
+    /*
+     * The guest can legitimately be changing the GTF_readonly flag. Allow
+     * that, but don't let a malicious guest cause a livelock.
+     */
+    for (retries = 0; retries < 5; retries++) {
+        uint16_t new_flags;
+        gnt = *gnt_p;
+
+        if ((gnt.flags & mask) != GTF_permit_access ||
+            gnt.domid != DOMID_QEMU) {
+            return INVALID_GPA;
+        }
+
+        new_flags = gnt.flags | GTF_reading;
+        if (prot & PROT_WRITE) {
+            new_flags |= GTF_writing;
+        }
+
+        if (qatomic_cmpxchg(&gnt_p->flags, gnt.flags, new_flags) == gnt.flags) {
+            return (uint64_t)gnt.frame << XEN_PAGE_SHIFT;
+        }
+    }
+
+    return INVALID_GPA;
+}
+
+struct xengntdev_handle {
+    GHashTable *active_maps;
+};
+
+static int xen_be_gnttab_set_max_grants(struct xengntdev_handle *xgt,
+                                        uint32_t nr_grants)
+{
+    return 0;
+}
+
+static void *xen_be_gnttab_map_refs(struct xengntdev_handle *xgt, uint32_t count,
+                                    uint32_t domid, uint32_t *refs, int prot)
+{
+    XenGnttabState *s = xen_gnttab_singleton;
+    struct active_ref *act;
+
+    if (!s) {
+        errno = ENOTSUP;
+        return NULL;
+    }
+
+    if (domid != xen_domid) {
+        errno = EINVAL;
+        return NULL;
+    }
+
+    if (!count || count > 4096) {
+        errno = EINVAL;
+        return NULL;
+    }
+
+    /*
+     * Making a contiguous mapping from potentially discontiguous grant
+     * references would be... distinctly non-trivial. We don't support it.
+     * Even changing the API to return an array of pointers, one per page,
+     * wouldn't be simple to use in PV backends because some structures
+     * actually cross page boundaries (e.g. 32-bit blkif_response ring
+     * entries are 12 bytes).
+     */
+    if (count != 1) {
+        errno = EINVAL;
+        return NULL;
+    }
+
+    QEMU_LOCK_GUARD(&s->gnt_lock);
+
+    act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0]));
+    if (act) {
+        if ((prot & PROT_WRITE) && !(act->prot & PROT_WRITE)) {
+            if (gnt_ref(s, refs[0], prot) == INVALID_GPA) {
+                return NULL;
+            }
+            act->prot |= PROT_WRITE;
+        }
+        act->refcnt++;
+    } else {
+        uint64_t gpa = gnt_ref(s, refs[0], prot);
+        if (gpa == INVALID_GPA) {
+            errno = EINVAL;
+            return NULL;
+        }
+
+        act = g_new0(struct active_ref, 1);
+        act->prot = prot;
+        act->refcnt = 1;
+        act->mrs = memory_region_find(get_system_memory(), gpa, XEN_PAGE_SIZE);
+
+        if (!act->mrs.mr || act->mrs.size < XEN_PAGE_SIZE ||
+            memory_region_get_ram_addr(act->mrs.mr) == RAM_ADDR_INVALID ||
+            !(act->virtaddr = qemu_map_ram_ptr(act->mrs.mr->ram_block,
+                                               act->mrs.offset_within_region))) {
+            gnt_unref(s, refs[0], &act->mrs, 0);
+            g_free(act);
+            errno = EINVAL;
+            return NULL;
+        }
+
+        s->map_track[refs[0]]++;
+        g_hash_table_insert(xgt->active_maps, GINT_TO_POINTER(refs[0]), act);
+    }
+
+    return act->virtaddr;
+}
+
+static gboolean do_unmap(gpointer key, gpointer value, gpointer user_data)
+{
+    XenGnttabState *s = user_data;
+    grant_ref_t gref = GPOINTER_TO_INT(key);
+    struct active_ref *act = value;
+
+    gnt_unref(s, gref, &act->mrs, act->prot);
+    g_free(act);
+    return true;
+}
+
+static int xen_be_gnttab_unmap(struct xengntdev_handle *xgt,
+                               void *start_address, uint32_t *refs,
+                               uint32_t count)
+{
+    XenGnttabState *s = xen_gnttab_singleton;
+    struct active_ref *act;
+
+    if (!s) {
+        return -ENOTSUP;
+    }
+
+    if (count != 1) {
+        return -EINVAL;
+    }
+
+    QEMU_LOCK_GUARD(&s->gnt_lock);
+
+    act = g_hash_table_lookup(xgt->active_maps, GINT_TO_POINTER(refs[0]));
+    if (!act) {
+        return -ENOENT;
+    }
+
+    if (act->virtaddr != start_address) {
+        return -EINVAL;
+    }
+
+    if (!--act->refcnt) {
+        do_unmap(GINT_TO_POINTER(refs[0]), act, s);
+        g_hash_table_remove(xgt->active_maps, GINT_TO_POINTER(refs[0]));
+    }
+
+    return 0;
+}
+
+/*
+ * This looks a bit like the one for true Xen in xen-operations.c but
+ * in emulation we don't support multi-page mappings. And under Xen we
+ * *want* the multi-page mappings so we have fewer bounces through the
+ * kernel and the hypervisor. So the code paths end up being similar,
+ * but different.
+ */
+static int xen_be_gnttab_copy(struct xengntdev_handle *xgt, bool to_domain,
+                              uint32_t domid, XenGrantCopySegment *segs,
+                              uint32_t nr_segs, Error **errp)
+{
+    int prot = to_domain ? PROT_WRITE : PROT_READ;
+    unsigned int i;
+
+    for (i = 0; i < nr_segs; i++) {
+        XenGrantCopySegment *seg = &segs[i];
+        void *page;
+        uint32_t ref = to_domain ? seg->dest.foreign.ref :
+            seg->source.foreign.ref;
+
+        page = xen_be_gnttab_map_refs(xgt, 1, domid, &ref, prot);
+        if (!page) {
+            if (errp) {
+                error_setg_errno(errp, errno,
+                                 "xen_be_gnttab_map_refs failed");
+            }
+            return -errno;
+        }
+
+        if (to_domain) {
+            memcpy(page + seg->dest.foreign.offset, seg->source.virt,
+                   seg->len);
+        } else {
+            memcpy(seg->dest.virt, page + seg->source.foreign.offset,
+                   seg->len);
+        }
+
+        if (xen_be_gnttab_unmap(xgt, page, &ref, 1)) {
+            if (errp) {
+                error_setg_errno(errp, errno, "xen_be_gnttab_unmap failed");
+            }
+            return -errno;
+        }
+    }
+
+    return 0;
+}
+
+static struct xengntdev_handle *xen_be_gnttab_open(void)
+{
+    struct xengntdev_handle *xgt = g_new0(struct xengntdev_handle, 1);
+
+    xgt->active_maps = g_hash_table_new(g_direct_hash, g_direct_equal);
+    return xgt;
+}
+
+static int xen_be_gnttab_close(struct xengntdev_handle *xgt)
+{
+    XenGnttabState *s = xen_gnttab_singleton;
+
+    if (!s) {
+        return -ENOTSUP;
+    }
+
+    g_hash_table_foreach_remove(xgt->active_maps, do_unmap, s);
+    g_hash_table_destroy(xgt->active_maps);
+    g_free(xgt);
+    return 0;
+}
+
+static struct gnttab_backend_ops emu_gnttab_backend_ops = {
+    .open = xen_be_gnttab_open,
+    .close = xen_be_gnttab_close,
+    .grant_copy = xen_be_gnttab_copy,
+    .set_max_grants = xen_be_gnttab_set_max_grants,
+    .map_refs = xen_be_gnttab_map_refs,
+    .unmap = xen_be_gnttab_unmap,
+};
+
-- 
2.39.0
[RFC PATCH v7bis 13/19] hw/xen: Implement soft reset for emulated gnttab
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

This is only part of it; we will also need to get the PV back end drivers
to tear down their own mappings (or do it for them, but they kind of need
to stop using the pointers too).

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/i386/kvm/xen_gnttab.c  | 26 ++++++++++++++++++++++++--
 hw/i386/kvm/xen_gnttab.h  |  1 +
 target/i386/kvm/xen-emu.c |  5 +++++
 3 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/hw/i386/kvm/xen_gnttab.c b/hw/i386/kvm/xen_gnttab.c
index 8008a391bd..3fe3a8043b 100644
--- a/hw/i386/kvm/xen_gnttab.c
+++ b/hw/i386/kvm/xen_gnttab.c
@@ -72,13 +72,11 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp)
         error_setg(errp, "Xen grant table support is for Xen emulation");
         return;
     }
-    s->nr_frames = 0;
     s->max_frames = kvm_xen_get_gnttab_max_frames();
     memory_region_init_ram(&s->gnt_frames, OBJECT(dev), "xen:grant_table",
                            XEN_PAGE_SIZE * s->max_frames, &error_abort);
     memory_region_set_enabled(&s->gnt_frames, true);
     s->entries.v1 = memory_region_get_ram_ptr(&s->gnt_frames);
-    memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
 
     /* Create individual page-sizes aliases for overlays */
     s->gnt_aliases = (void *)g_new0(MemoryRegion, s->max_frames);
@@ -90,8 +88,11 @@ static void xen_gnttab_realize(DeviceState *dev, Error **errp)
         s->gnt_frame_gpas[i] = INVALID_GPA;
     }
 
+    s->nr_frames = 0;
+    memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
     s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
     s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
+
     qemu_mutex_init(&s->gnt_lock);
 
     xen_gnttab_singleton = s;
@@ -524,3 +525,24 @@ static struct gnttab_backend_ops emu_gnttab_backend_ops = {
     .unmap = xen_be_gnttab_unmap,
 };
 
+int xen_gnttab_reset(void)
+{
+    XenGnttabState *s = xen_gnttab_singleton;
+
+    if (!s) {
+        return -ENOTSUP;
+    }
+
+    QEMU_LOCK_GUARD(&s->gnt_lock);
+
+    s->nr_frames = 0;
+
+    memset(s->entries.v1, 0, XEN_PAGE_SIZE * s->max_frames);
+
+    s->entries.v1[GNTTAB_RESERVED_XENSTORE].flags = GTF_permit_access;
+    s->entries.v1[GNTTAB_RESERVED_XENSTORE].frame = XEN_SPECIAL_PFN(XENSTORE);
+
+    memset(s->map_track, 0, s->max_frames * ENTRIES_PER_FRAME_V1);
+
+    return 0;
+}
diff --git a/hw/i386/kvm/xen_gnttab.h b/hw/i386/kvm/xen_gnttab.h
index 3bdbe96191..ee215239b0 100644
--- a/hw/i386/kvm/xen_gnttab.h
+++ b/hw/i386/kvm/xen_gnttab.h
@@ -13,6 +13,7 @@
 #define QEMU_XEN_GNTTAB_H
 
 void xen_gnttab_create(void);
+int xen_gnttab_reset(void);
 int xen_gnttab_map_page(uint64_t idx, uint64_t gfn);
 
 struct gnttab_set_version;
diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index 0cd7113681..e2e5e96b67 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -1341,6 +1341,11 @@ static int kvm_xen_soft_reset(void)
         return err;
     }
 
+    err = xen_gnttab_reset();
+    if (err) {
+        return err;
+    }
+
     err = xen_xenstore_reset();
     if (err) {
         return err;
-- 
2.39.0
[RFC PATCH v7bis 14/19] hw/xen: Remove old version of Xen headers
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

For Xen emulation support, a full set of Xen headers was imported to
include/standard-headers/xen. This renders the original partial set in
include/hw/xen/interface redundant.

Now that the header ordering and handling of __XEN_INTERFACE_VERSION__
vs. __XEN_TOOLS__ is all untangled, remove the old set of headers and
use only the new ones.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/9pfs/xen-9pfs.h                      |    4 +-
 hw/block/dataplane/xen-block.c          |    2 +-
 hw/block/xen_blkif.h                    |    5 +-
 hw/char/xen_console.c                   |    2 +-
 hw/display/xenfb.c                      |    6 +-
 hw/net/xen_nic.c                        |    2 +-
 hw/usb/xen-usb.c                        |    2 +-
 include/hw/xen/interface/grant_table.h  |   36 -
 include/hw/xen/interface/io/blkif.h     |  712 ----------------
 include/hw/xen/interface/io/console.h   |   46 --
 include/hw/xen/interface/io/fbif.h      |  156 ----
 include/hw/xen/interface/io/kbdif.h     |  566 -------------
 include/hw/xen/interface/io/netif.h     | 1010 -----------------------
 include/hw/xen/interface/io/protocols.h |   42 -
 include/hw/xen/interface/io/ring.h      |  474 -----------
 include/hw/xen/interface/io/usbif.h     |  254 ------
 include/hw/xen/interface/io/xenbus.h    |   70 --
 17 files changed, 12 insertions(+), 3377 deletions(-)
 delete mode 100644 include/hw/xen/interface/grant_table.h
 delete mode 100644 include/hw/xen/interface/io/blkif.h
 delete mode 100644 include/hw/xen/interface/io/console.h
 delete mode 100644 include/hw/xen/interface/io/fbif.h
 delete mode 100644 include/hw/xen/interface/io/kbdif.h
 delete mode 100644 include/hw/xen/interface/io/netif.h
 delete mode 100644 include/hw/xen/interface/io/protocols.h
 delete mode 100644 include/hw/xen/interface/io/ring.h
 delete mode 100644 include/hw/xen/interface/io/usbif.h
 delete mode 100644 include/hw/xen/interface/io/xenbus.h

diff --git a/hw/9pfs/xen-9pfs.h b/hw/9pfs/xen-9pfs.h
index 241e2216a4..2eb369b262 100644
--- a/hw/9pfs/xen-9pfs.h
+++ b/hw/9pfs/xen-9pfs.h
@@ -13,8 +13,8 @@
 #ifndef HW_9PFS_XEN_9PFS_H
 #define HW_9PFS_XEN_9PFS_H
 
-#include "hw/xen/interface/io/protocols.h"
-#include "hw/xen/interface/io/ring.h"
+#include "standard-headers/xen/io/protocols.h"
+#include "standard-headers/xen/io/ring.h"
 
 /*
  * Do not merge into xen-9p-backend.c: clang doesn't allow unused static
diff --git a/hw/block/dataplane/xen-block.c b/hw/block/dataplane/xen-block.c
index d44e39e2a1..8fa1b3ab83 100644
--- a/hw/block/dataplane/xen-block.c
+++ b/hw/block/dataplane/xen-block.c
@@ -25,7 +25,7 @@
 #include "qapi/error.h"
 #include "hw/block/xen_blkif.h"
 #include "hw/xen/xen.h"
-#include "hw/xen/interface/io/ring.h"
+#include "standard-headers/xen/io/ring.h"
 #include "sysemu/block-backend.h"
 #include "sysemu/iothread.h"
 #include "xen-block.h"
diff --git a/hw/block/xen_blkif.h b/hw/block/xen_blkif.h
index 99733529c1..465e4097b7 100644
--- a/hw/block/xen_blkif.h
+++ b/hw/block/xen_blkif.h
@@ -1,8 +1,9 @@
 #ifndef XEN_BLKIF_H
 #define XEN_BLKIF_H
 
-#include "hw/xen/interface/io/blkif.h"
-#include "hw/xen/interface/io/protocols.h"
+#include "hw/xen/xen.h"
+#include "standard-headers/xen/io/blkif.h"
+#include "standard-headers/xen/io/protocols.h"
 
 /*
  * Not a real protocol.  Used to generate ring structs which contain
diff --git a/hw/char/xen_console.c b/hw/char/xen_console.c
index c7a19c0e7c..15265099a3 100644
--- a/hw/char/xen_console.c
+++ b/hw/char/xen_console.c
@@ -28,7 +28,7 @@
 #include "chardev/char-fe.h"
 #include "hw/xen/xen-legacy-backend.h"
 
-#include "hw/xen/interface/io/console.h"
+#include "standard-headers/xen/io/console.h"
 
 struct buffer {
     uint8_t *data;
diff --git a/hw/display/xenfb.c b/hw/display/xenfb.c
index 0074a9b6f8..62b3db14c5 100644
--- a/hw/display/xenfb.c
+++ b/hw/display/xenfb.c
@@ -31,9 +31,9 @@
 #include "ui/console.h"
 #include "hw/xen/xen-legacy-backend.h"
 
-#include "hw/xen/interface/io/fbif.h"
-#include "hw/xen/interface/io/kbdif.h"
-#include "hw/xen/interface/io/protocols.h"
+#include "standard-headers/xen/io/fbif.h"
+#include "standard-headers/xen/io/kbdif.h"
+#include "standard-headers/xen/io/protocols.h"
 
 #include "trace.h"
 
diff --git a/hw/net/xen_nic.c b/hw/net/xen_nic.c
index 9bbf6599fc..9df09dc3a4 100644
--- a/hw/net/xen_nic.c
+++ b/hw/net/xen_nic.c
@@ -29,7 +29,7 @@
 #include "net/util.h"
 #include "hw/xen/xen-legacy-backend.h"
 
-#include "hw/xen/interface/io/netif.h"
+#include "standard-headers/xen/io/netif.h"
 
 /* ------------------------------------------------------------- */
 
diff --git a/hw/usb/xen-usb.c b/hw/usb/xen-usb.c
index 66cb3f7c24..5889ea0a3c 100644
--- a/hw/usb/xen-usb.c
+++ b/hw/usb/xen-usb.c
@@ -33,7 +33,7 @@
 #include "qapi/qmp/qdict.h"
 #include "qapi/qmp/qstring.h"
 
-#include "hw/xen/interface/io/usbif.h"
+#include "standard-headers/xen/io/usbif.h"
 
 /*
  * Check for required support of usbif.h: USBIF_SHORT_NOT_OK was the last
diff --git a/include/hw/xen/interface/grant_table.h b/include/hw/xen/interface/grant_table.h
deleted file mode 100644
index 2af0cbdde3..0000000000
--- a/include/hw/xen/interface/grant_table.h
+++ /dev/null
@@ -1,36 +0,0 @@
-/******************************************************************************
- * grant_table.h
- *
- * Interface for granting foreign access to page frames, and receiving
- * page-ownership transfers.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Copyright (c) 2004, K A Fraser
- */
-
-#ifndef __XEN_PUBLIC_GRANT_TABLE_H__
-#define __XEN_PUBLIC_GRANT_TABLE_H__
-
-/*
- * Reference to a grant entry in a specified domain's grant table.
- */
-typedef uint32_t grant_ref_t;
-
-#endif /* __XEN_PUBLIC_GRANT_TABLE_H__ */
diff --git a/include/hw/xen/interface/io/blkif.h b/include/hw/xen/interface/io/blkif.h
deleted file mode 100644
index d07fa1e078..0000000000
--- a/include/hw/xen/interface/io/blkif.h
+++ /dev/null
@@ -1,712 +0,0 @@
-/******************************************************************************
- * blkif.h
- *
- * Unified block-device I/O interface for Xen guest OSes.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Copyright (c) 2003-2004, Keir Fraser
- * Copyright (c) 2012, Spectra Logic Corporation
- */
-
-#ifndef __XEN_PUBLIC_IO_BLKIF_H__
-#define __XEN_PUBLIC_IO_BLKIF_H__
-
-#include "ring.h"
-#include "../grant_table.h"
-
-/*
- * Front->back notifications: When enqueuing a new request, sending a
- * notification can be made conditional on req_event (i.e., the generic
- * hold-off mechanism provided by the ring macros). Backends must set
- * req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
- *
- * Back->front notifications: When enqueuing a new response, sending a
- * notification can be made conditional on rsp_event (i.e., the generic
- * hold-off mechanism provided by the ring macros). Frontends must set
- * rsp_event appropriately (e.g., using RING_FINAL_CHECK_FOR_RESPONSES()).
- */
-
-#ifndef blkif_vdev_t
-#define blkif_vdev_t   uint16_t
-#endif
-#define blkif_sector_t uint64_t
-
-/*
- * Feature and Parameter Negotiation
- * =================================
- * The two halves of a Xen block driver utilize nodes within the XenStore to
- * communicate capabilities and to negotiate operating parameters.  This
- * section enumerates these nodes which reside in the respective front and
- * backend portions of the XenStore, following the XenBus convention.
- *
- * All data in the XenStore is stored as strings.  Nodes specifying numeric
- * values are encoded in decimal.  Integer value ranges listed below are
- * expressed as fixed sized integer types capable of storing the conversion
- * of a properly formated node string, without loss of information.
- *
- * Any specified default value is in effect if the corresponding XenBus node
- * is not present in the XenStore.
- *
- * XenStore nodes in sections marked "PRIVATE" are solely for use by the
- * driver side whose XenBus tree contains them.
- *
- * XenStore nodes marked "DEPRECATED" in their notes section should only be
- * used to provide interoperability with legacy implementations.
- *
- * See the XenBus state transition diagram below for details on when XenBus
- * nodes must be published and when they can be queried.
- *
- *****************************************************************************
- *                            Backend XenBus Nodes
- *****************************************************************************
- *
- *------------------ Backend Device Identification (PRIVATE) ------------------
- *
- * mode
- *      Values:         "r" (read only), "w" (writable)
- *
- *      The read or write access permissions to the backing store to be
- *      granted to the frontend.
- *
- * params
- *      Values:         string
- *
- *      A free formatted string providing sufficient information for the
- *      hotplug script to attach the device and provide a suitable
- *      handler (ie: a block device) for blkback to use.
- *
- * physical-device
- *      Values:         "MAJOR:MINOR"
- *      Notes: 11
- *
- *      MAJOR and MINOR are the major number and minor number of the
- *      backing device respectively.
- *
- * physical-device-path
- *      Values:         path string
- *
- *      A string that contains the absolute path to the disk image. On
- *      NetBSD and Linux this is always a block device, while on FreeBSD
- *      it can be either a block device or a regular file.
- *
- * type
- *      Values:         "file", "phy", "tap"
- *
- *      The type of the backing device/object.
- *
- *
- * direct-io-safe
- *      Values:         0/1 (boolean)
- *      Default Value:  0
- *
- *      The underlying storage is not affected by the direct IO memory
- *      lifetime bug.  See:
- *        http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html
- *
- *      Therefore this option gives the backend permission to use
- *      O_DIRECT, notwithstanding that bug.
- *
- *      That is, if this option is enabled, use of O_DIRECT is safe,
- *      in circumstances where we would normally have avoided it as a
- *      workaround for that bug.  This option is not relevant for all
- *      backends, and even not necessarily supported for those for
- *      which it is relevant.  A backend which knows that it is not
- *      affected by the bug can ignore this option.
- *
- *      This option doesn't require a backend to use O_DIRECT, so it
- *      should not be used to try to control the caching behaviour.
- *
- *--------------------------------- Features ---------------------------------
- *
- * feature-barrier
- *      Values:         0/1 (boolean)
- *      Default Value:  0
- *
- *      A value of "1" indicates that the backend can process requests
- *      containing the BLKIF_OP_WRITE_BARRIER request opcode.  Requests
- *      of this type may still be returned at any time with the
- *      BLKIF_RSP_EOPNOTSUPP result code.
- *
- * feature-flush-cache
- *      Values:         0/1 (boolean)
- *      Default Value:  0
- *
- *      A value of "1" indicates that the backend can process requests
- *      containing the BLKIF_OP_FLUSH_DISKCACHE request opcode.  Requests
- *      of this type may still be returned at any time with the
- *      BLKIF_RSP_EOPNOTSUPP result code.
- *
- * feature-discard
- *      Values:         0/1 (boolean)
- *      Default Value:  0
- *
- *      A value of "1" indicates that the backend can process requests
- *      containing the BLKIF_OP_DISCARD request opcode.  Requests
- *      of this type may still be returned at any time with the
- *      BLKIF_RSP_EOPNOTSUPP result code.
- *
- * feature-persistent
- *      Values:         0/1 (boolean)
- *      Default Value:  0
- *      Notes: 7
- *
- *      A value of "1" indicates that the backend can keep the grants used
- *      by the frontend driver mapped, so the same set of grants should be
- *      used in all transactions. The maximum number of grants the backend
- *      can map persistently depends on the implementation, but ideally it
- *      should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this
- *      feature the backend doesn't need to unmap each grant, preventing
- *      costly TLB flushes. The backend driver should only map grants
- *      persistently if the frontend supports it. If a backend driver chooses
- *      to use the persistent protocol when the frontend doesn't support it,
- *      it will probably hit the maximum number of persistently mapped grants
- *      (due to the fact that the frontend won't be reusing the same grants),
- *      and fall back to non-persistent mode. Backend implementations may
- *      shrink or expand the number of persistently mapped grants without
- *      notifying the frontend depending on memory constraints (this might
- *      cause a performance degradation).
- *
- *      If a backend driver wants to limit the maximum number of persistently
- *      mapped grants to a value less than RING_SIZE *
- *      BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to
- *      discard the grants that are less commonly used. Using a LRU in the
- *      backend driver paired with a LIFO queue in the frontend will
- *      allow us to have better performance in this scenario.
- *
- *----------------------- Request Transport Parameters ------------------------
- *
- * max-ring-page-order
- *      Values:         <uint32_t>
- *      Default Value:  0
- *      Notes:          1, 3
- *
- *      The maximum supported size of the request ring buffer in units of
- *      lb(machine pages). (e.g. 0 == 1 page,  1 = 2 pages, 2 == 4 pages,
- *      etc.).
- *
- * max-ring-pages
- *      Values:         <uint32_t>
- *      Default Value:  1
- *      Notes:          DEPRECATED, 2, 3
- *
- *      The maximum supported size of the request ring buffer in units of
- *      machine pages.  The value must be a power of 2.
- *
- *------------------------- Backend Device Properties -------------------------
- *
- * discard-enable
- *      Values:         0/1 (boolean)
- *      Default Value:  1
- *
- *      This optional property, set by the toolstack, instructs the backend
- *      to offer (or not to offer) discard to the frontend. If the property
- *      is missing the backend should offer discard if the backing storage
- *      actually supports it.
- *
- * discard-alignment
- *      Values:         <uint32_t>
- *      Default Value:  0
- *      Notes:          4, 5
- *
- *      The offset, in bytes from the beginning of the virtual block device,
- *      to the first, addressable, discard extent on the underlying device.
- *
- * discard-granularity
- *      Values:         <uint32_t>
- *      Default Value:  <"sector-size">
- *      Notes:          4
- *
- *      The size, in bytes, of the individually addressable discard extents
- *      of the underlying device.
- *
- * discard-secure
- *      Values:         0/1 (boolean)
- *      Default Value:  0
- *      Notes:          10
- *
- *      A value of "1" indicates that the backend can process BLKIF_OP_DISCARD
- *      requests with the BLKIF_DISCARD_SECURE flag set.
- *
- * info
- *      Values:         <uint32_t> (bitmap)
- *
- *      A collection of bit flags describing attributes of the backing
- *      device.  The VDISK_* macros define the meaning of each bit
- *      location.
- *
- * sector-size
- *      Values:         <uint32_t>
- *
- *      The logical block size, in bytes, of the underlying storage. This
- *      must be a power of two with a minimum value of 512.
- *
- *      NOTE: Because of implementation bugs in some frontends this must be
- *            set to 512, unless the frontend advertizes a non-zero value
- *            in its "feature-large-sector-size" xenbus node. (See below).
- *
- * physical-sector-size
- *      Values:         <uint32_t>
- *      Default Value:  <"sector-size">
- *
- *      The physical block size, in bytes, of the backend storage. This
- *      must be an integer multiple of "sector-size".
- *
- * sectors
- *      Values:         <uint64_t>
- *
- *      The size of the backend device, expressed in units of "sector-size".
- *      The product of "sector-size" and "sectors" must also be an integer
- *      multiple of "physical-sector-size", if that node is present.
- *
- *****************************************************************************
- *                            Frontend XenBus Nodes
- *****************************************************************************
- *
- *----------------------- Request Transport Parameters -----------------------
- *
- * event-channel
- *      Values:         <uint32_t>
- *
- *      The identifier of the Xen event channel used to signal activity
- *      in the ring buffer.
- *
- * ring-ref
- *      Values:         <uint32_t>
- *      Notes:          6
- *
- *      The Xen grant reference granting permission for the backend to map
- *      the sole page in a single page sized ring buffer.
- *
- * ring-ref%u
- *      Values:         <uint32_t>
- *      Notes:          6
- *
- *      For a frontend providing a multi-page ring, a "number of ring pages"
- *      sized list of nodes, each containing a Xen grant reference granting
- *      permission for the backend to map the page of the ring located
- *      at page index "%u".  Page indexes are zero based.
- *
- * protocol
- *      Values:         string (XEN_IO_PROTO_ABI_*)
- *      Default Value:  XEN_IO_PROTO_ABI_NATIVE
- *
- *      The machine ABI rules governing the format of all ring request and
- *      response structures.
- *
- * ring-page-order
- *      Values:         <uint32_t>
- *      Default Value:  0
- *      Maximum Value:  MAX(ffs(max-ring-pages) - 1, max-ring-page-order)
- *      Notes:          1, 3
- *
- *      The size of the frontend allocated request ring buffer in units
- *      of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
- *      etc.).
- *
- * num-ring-pages
- *      Values:         <uint32_t>
- *      Default Value:  1
- *      Maximum Value:  MAX(max-ring-pages,(0x1 << max-ring-page-order))
- *      Notes:          DEPRECATED, 2, 3
- *
- *      The size of the frontend allocated request ring buffer in units of
- *      machine pages.  The value must be a power of 2.
- *
- *--------------------------------- Features ---------------------------------
- *
- * feature-persistent
- *      Values:         0/1 (boolean)
- *      Default Value:  0
- *      Notes: 7, 8, 9
- *
- *      A value of "1" indicates that the frontend will reuse the same grants
- *      for all transactions, allowing the backend to map them with write
- *      access (even when it should be read-only). If the frontend hits the
- *      maximum number of allowed persistently mapped grants, it can fallback
- *      to non persistent mode. This will cause a performance degradation,
- *      since the backend driver will still try to map those grants
- *      persistently. Since the persistent grants protocol is compatible with
- *      the previous protocol, a frontend driver can choose to work in
- *      persistent mode even when the backend doesn't support it.
- *
- *      It is recommended that the frontend driver stores the persistently
- *      mapped grants in a LIFO queue, so a subset of all persistently mapped
- *      grants gets used commonly. This is done in case the backend driver
- *      decides to limit the maximum number of persistently mapped grants
- *      to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
- *
- * feature-large-sector-size
- *      Values:         0/1 (boolean)
- *      Default Value:  0
- *
- *      A value of "1" indicates that the frontend will correctly supply and
- *      interpret all sector-based quantities in terms of the "sector-size"
- *      value supplied in the backend info, whatever that may be set to.
- *      If this node is not present or its value is "0" then it is assumed
- *      that the frontend requires that the logical block size is 512 as it
- *      is hardcoded (which is the case in some frontend implementations).
- *
- *------------------------- Virtual Device Properties -------------------------
- *
- * device-type
- *      Values:         "disk", "cdrom", "floppy", etc.
- *
- * virtual-device
- *      Values:         <uint32_t>
- *
- *      A value indicating the physical device to virtualize within the
- *      frontend's domain.  (e.g. "The first ATA disk", "The third SCSI
- *      disk", etc.)
- *
- *      See docs/misc/vbd-interface.txt for details on the format of this
- *      value.
- *
- * Notes
- * -----
- * (1) Multi-page ring buffer scheme first developed in the Citrix XenServer
- *     PV drivers.
- * (2) Multi-page ring buffer scheme first used in some RedHat distributions
- *     including a distribution deployed on certain nodes of the Amazon
- *     EC2 cluster.
- * (3) Support for multi-page ring buffers was implemented independently,
- *     in slightly different forms, by both Citrix and RedHat/Amazon.
- *     For full interoperability, block front and backends should publish
- *     identical ring parameters, adjusted for unit differences, to the
- *     XenStore nodes used in both schemes.
- * (4) Devices that support discard functionality may internally allocate space
- *     (discardable extents) in units that are larger than the exported logical
- *     block size. If the backing device has such discardable extents the
- *     backend should provide both discard-granularity and discard-alignment.
- *     Providing just one of the two may be considered an error by the frontend.
- *     Backends supporting discard should include discard-granularity and
- *     discard-alignment even if it supports discarding individual sectors.
- *     Frontends should assume discard-alignment == 0 and discard-granularity
- *     == sector size if these keys are missing.
- * (5) The discard-alignment parameter allows a physical device to be
- *     partitioned into virtual devices that do not necessarily begin or
- *     end on a discardable extent boundary.
- * (6) When there is only a single page allocated to the request ring,
- *     'ring-ref' is used to communicate the grant reference for this
- *     page to the backend.  When using a multi-page ring, the 'ring-ref'
- *     node is not created.  Instead 'ring-ref0' - 'ring-refN' are used.
- * (7) When using persistent grants data has to be copied from/to the page
- *     where the grant is currently mapped. The overhead of doing this copy
- *     however doesn't suppress the speed improvement of not having to unmap
- *     the grants.
- * (8) The frontend driver has to allow the backend driver to map all grants
- *     with write access, even when they should be mapped read-only, since
- *     further requests may reuse these grants and require write permissions.
- * (9) Linux implementation doesn't have a limit on the maximum number of
- *     grants that can be persistently mapped in the frontend driver, but
- *     due to the frontent driver implementation it should never be bigger
- *     than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
- *(10) The discard-secure property may be present and will be set to 1 if the
- *     backing device supports secure discard.
- *(11) Only used by Linux and NetBSD.
- */
-
-/*
- * Multiple hardware queues/rings:
- * If supported, the backend will write the key "multi-queue-max-queues" to
- * the directory for that vbd, and set its value to the maximum supported
- * number of queues.
- * Frontends that are aware of this feature and wish to use it can write the
- * key "multi-queue-num-queues" with the number they wish to use, which must be
- * greater than zero, and no more than the value reported by the backend in
- * "multi-queue-max-queues".
- *
- * For frontends requesting just one queue, the usual event-channel and
- * ring-ref keys are written as before, simplifying the backend processing
- * to avoid distinguishing between a frontend that doesn't understand the
- * multi-queue feature, and one that does, but requested only one queue.
- *
- * Frontends requesting two or more queues must not write the toplevel
- * event-channel and ring-ref keys, instead writing those keys under sub-keys
- * having the name "queue-N" where N is the integer ID of the queue/ring for
- * which those keys belong. Queues are indexed from zero.
- * For example, a frontend with two queues must write the following set of
- * queue-related keys:
- *
- * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
- * /local/domain/1/device/vbd/0/queue-0 = ""
- * /local/domain/1/device/vbd/0/queue-0/ring-ref = "<ring-ref#0>"
- * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
- * /local/domain/1/device/vbd/0/queue-1 = ""
- * /local/domain/1/device/vbd/0/queue-1/ring-ref = "<ring-ref#1>"
- * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
- *
- * It is also possible to use multiple queues/rings together with
- * feature multi-page ring buffer.
- * For example, a frontend requests two queues/rings and the size of each ring
- * buffer is two pages must write the following set of related keys:
- *
- * /local/domain/1/device/vbd/0/multi-queue-num-queues = "2"
- * /local/domain/1/device/vbd/0/ring-page-order = "1"
- * /local/domain/1/device/vbd/0/queue-0 = ""
- * /local/domain/1/device/vbd/0/queue-0/ring-ref0 = "<ring-ref#0>"
- * /local/domain/1/device/vbd/0/queue-0/ring-ref1 = "<ring-ref#1>"
- * /local/domain/1/device/vbd/0/queue-0/event-channel = "<evtchn#0>"
- * /local/domain/1/device/vbd/0/queue-1 = ""
- * /local/domain/1/device/vbd/0/queue-1/ring-ref0 = "<ring-ref#2>"
- * /local/domain/1/device/vbd/0/queue-1/ring-ref1 = "<ring-ref#3>"
- * /local/domain/1/device/vbd/0/queue-1/event-channel = "<evtchn#1>"
- *
- */
-
-/*
- * STATE DIAGRAMS
- *
- *****************************************************************************
- *                                   Startup                                 *
- *****************************************************************************
- *
- * Tool stack creates front and back nodes with state XenbusStateInitialising.
- *
- * Front                                Back
- * =================================    =====================================
- * XenbusStateInitialising              XenbusStateInitialising
- *  o Query virtual device               o Query backend device identification
- *    properties.                          data.
- *  o Setup OS device instance.          o Open and validate backend device.
- *                                       o Publish backend features and
- *                                         transport parameters.
- *                                                      |
- *                                                      |
- *                                                      V
- *                                      XenbusStateInitWait
- *
- * o Query backend features and
- *   transport parameters.
- * o Allocate and initialize the
- *   request ring.
- * o Publish transport parameters
- *   that will be in effect during
- *   this connection.
- *              |
- *              |
- *              V
- * XenbusStateInitialised
- *
- *                                       o Query frontend transport parameters.
- *                                       o Connect to the request ring and
- *                                         event channel.
- *                                       o Publish backend device properties.
- *                                                      |
- *                                                      |
- *                                                      V
- *                                      XenbusStateConnected
- *
- *  o Query backend device properties.
- *  o Finalize OS virtual device
- *    instance.
- *              |
- *              |
- *              V
- * XenbusStateConnected
- *
- * Note: Drivers that do not support any optional features, or the negotiation
- *       of transport parameters, can skip certain states in the state machine:
- *
- *       o A frontend may transition to XenbusStateInitialised without
- *         waiting for the backend to enter XenbusStateInitWait.  In this
- *         case, default transport parameters are in effect and any
- *         transport parameters published by the frontend must contain
- *         their default values.
- *
- *       o A backend may transition to XenbusStateInitialised, bypassing
- *         XenbusStateInitWait, without waiting for the frontend to first
- *         enter the XenbusStateInitialised state.  In this case, default
- *         transport parameters are in effect and any transport parameters
- *         published by the backend must contain their default values.
- *
- *       Drivers that support optional features and/or transport parameter
- *       negotiation must tolerate these additional state transition paths.
- *       In general this means performing the work of any skipped state
- *       transition, if it has not already been performed, in addition to the
- *       work associated with entry into the current state.
- */
-
-/*
- * REQUEST CODES.
- */
-#define BLKIF_OP_READ              0
-#define BLKIF_OP_WRITE             1
-/*
- * All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER
- * operation code ("barrier request") must be completed prior to the
- * execution of the barrier request.  All writes issued after the barrier
- * request must not execute until after the completion of the barrier request.
- *
- * Optional.  See "feature-barrier" XenBus node documentation above.
- */
-#define BLKIF_OP_WRITE_BARRIER     2
-/*
- * Commit any uncommitted contents of the backing device's volatile cache
- * to stable storage.
- *
- * Optional.  See "feature-flush-cache" XenBus node documentation above.
- */
-#define BLKIF_OP_FLUSH_DISKCACHE   3
-/*
- * Used in SLES sources for device specific command packet
- * contained within the request. Reserved for that purpose.
- */
-#define BLKIF_OP_RESERVED_1        4
-/*
- * Indicate to the backend device that a region of storage is no longer in
- * use, and may be discarded at any time without impact to the client.  If
- * the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the
- * discarded region on the device must be rendered unrecoverable before the
- * command returns.
- *
- * This operation is analogous to performing a trim (ATA) or unamp (SCSI),
- * command on a native device.
- *
- * More information about trim/unmap operations can be found at:
- * http://t13.org/Documents/UploadedDocuments/docs2008/
- *     e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
- * http://www.seagate.com/staticfiles/support/disc/manuals/
- *     Interface%20manuals/100293068c.pdf
- *
- * Optional.  See "feature-discard", "discard-alignment",
- * "discard-granularity", and "discard-secure" in the XenBus node
- * documentation above.
- */
-#define BLKIF_OP_DISCARD           5
-
-/*
- * Recognized if "feature-max-indirect-segments" in present in the backend
- * xenbus info. The "feature-max-indirect-segments" node contains the maximum
- * number of segments allowed by the backend per request. If the node is
- * present, the frontend might use blkif_request_indirect structs in order to
- * issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
- * maximum number of indirect segments is fixed by the backend, but the
- * frontend can issue requests with any number of indirect segments as long as
- * it's less than the number provided by the backend. The indirect_grefs field
- * in blkif_request_indirect should be filled by the frontend with the
- * grant references of the pages that are holding the indirect segments.
- * These pages are filled with an array of blkif_request_segment that hold the
- * information about the segments. The number of indirect pages to use is
- * determined by the number of segments an indirect request contains. Every
- * indirect page can contain a maximum of
- * (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
- * calculate the number of indirect pages to use we have to do
- * ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))).
- *
- * If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
- * create the "feature-max-indirect-segments" node!
- */
-#define BLKIF_OP_INDIRECT          6
-
-/*
- * Maximum scatter/gather segments per request.
- * This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
- * NB. This could be 12 if the ring indexes weren't stored in the same page.
- */
-#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
-
-/*
- * Maximum number of indirect pages to use per request.
- */
-#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
-
-/*
- * NB. 'first_sect' and 'last_sect' in blkif_request_segment, as well as
- * 'sector_number' in blkif_request, blkif_request_discard and
- * blkif_request_indirect are sector-based quantities. See the description
- * of the "feature-large-sector-size" frontend xenbus node above for
- * more information.
- */
-struct blkif_request_segment {
-    grant_ref_t gref;        /* reference to I/O buffer frame        */
-    /* @first_sect: first sector in frame to transfer (inclusive).   */
-    /* @last_sect: last sector in frame to transfer (inclusive).     */
-    uint8_t     first_sect, last_sect;
-};
-
-/*
- * Starting ring element for any I/O request.
- */
-struct blkif_request {
-    uint8_t        operation;    /* BLKIF_OP_???                         */
-    uint8_t        nr_segments;  /* number of segments                   */
-    blkif_vdev_t   handle;       /* only for read/write requests         */
-    uint64_t       id;           /* private guest value, echoed in resp  */
-    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-    struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-};
-typedef struct blkif_request blkif_request_t;
-
-/*
- * Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD
- * sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request)
- */
-struct blkif_request_discard {
-    uint8_t        operation;    /* BLKIF_OP_DISCARD                     */
-    uint8_t        flag;         /* BLKIF_DISCARD_SECURE or zero         */
-#define BLKIF_DISCARD_SECURE (1<<0)  /* ignored if discard-secure=0      */
-    blkif_vdev_t   handle;       /* same as for read/write requests      */
-    uint64_t       id;           /* private guest value, echoed in resp  */
-    blkif_sector_t sector_number;/* start sector idx on disk             */
-    uint64_t       nr_sectors;   /* number of contiguous sectors to discard*/
-};
-typedef struct blkif_request_discard blkif_request_discard_t;
-
-struct blkif_request_indirect {
-    uint8_t        operation;    /* BLKIF_OP_INDIRECT                    */
-    uint8_t        indirect_op;  /* BLKIF_OP_{READ/WRITE}                */
-    uint16_t       nr_segments;  /* number of segments                   */
-    uint64_t       id;           /* private guest value, echoed in resp  */
-    blkif_sector_t sector_number;/* start sector idx on disk (r/w only)  */
-    blkif_vdev_t   handle;       /* same as for read/write requests      */
-    grant_ref_t    indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
-#ifdef __i386__
-    uint64_t       pad;          /* Make it 64 byte aligned on i386      */
-#endif
-};
-typedef struct blkif_request_indirect blkif_request_indirect_t;
-
-struct blkif_response {
-    uint64_t        id;              /* copied from request */
-    uint8_t         operation;       /* copied from request */
-    int16_t         status;          /* BLKIF_RSP_???       */
-};
-typedef struct blkif_response blkif_response_t;
-
-/*
- * STATUS RETURN CODES.
- */
- /* Operation not supported (only happens on barrier writes). */
-#define BLKIF_RSP_EOPNOTSUPP  -2
- /* Operation failed for some unspecified reason (-EIO). */
-#define BLKIF_RSP_ERROR       -1
- /* Operation completed successfully. */
-#define BLKIF_RSP_OKAY         0
-
-/*
- * Generate blkif ring structures and types.
- */
-DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
-
-#define VDISK_CDROM        0x1
-#define VDISK_REMOVABLE    0x2
-#define VDISK_READONLY     0x4
-
-#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
diff --git a/include/hw/xen/interface/io/console.h b/include/hw/xen/interface/io/console.h
deleted file mode 100644
index e2155d1cf5..0000000000
--- a/include/hw/xen/interface/io/console.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/******************************************************************************
- * console.h
- *
- * Console I/O interface for Xen guest OSes.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Copyright (c) 2005, Keir Fraser
- */
-
-#ifndef __XEN_PUBLIC_IO_CONSOLE_H__
-#define __XEN_PUBLIC_IO_CONSOLE_H__
-
-typedef uint32_t XENCONS_RING_IDX;
-
-#define MASK_XENCONS_IDX(idx, ring) ((idx) & (sizeof(ring)-1))
-
-struct xencons_interface {
-    char in[1024];
-    char out[2048];
-    XENCONS_RING_IDX in_cons, in_prod;
-    XENCONS_RING_IDX out_cons, out_prod;
-};
-
-#ifdef XEN_WANT_FLEX_CONSOLE_RING
-#include "ring.h"
-DEFINE_XEN_FLEX_RING(xencons);
-#endif
-
-#endif /* __XEN_PUBLIC_IO_CONSOLE_H__ */
diff --git a/include/hw/xen/interface/io/fbif.h b/include/hw/xen/interface/io/fbif.h
deleted file mode 100644
index ea87ebec0a..0000000000
--- a/include/hw/xen/interface/io/fbif.h
+++ /dev/null
@@ -1,156 +0,0 @@
-/*
- * fbif.h -- Xen virtual frame buffer device
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
- * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
- */
-
-#ifndef __XEN_PUBLIC_IO_FBIF_H__
-#define __XEN_PUBLIC_IO_FBIF_H__
-
-/* Out events (frontend -> backend) */
-
-/*
- * Out events may be sent only when requested by backend, and receipt
- * of an unknown out event is an error.
- */
-
-/* Event type 1 currently not used */
-/*
- * Framebuffer update notification event
- * Capable frontend sets feature-update in xenstore.
- * Backend requests it by setting request-update in xenstore.
- */
-#define XENFB_TYPE_UPDATE 2
-
-struct xenfb_update
-{
-    uint8_t type;    /* XENFB_TYPE_UPDATE */
-    int32_t x;      /* source x */
-    int32_t y;      /* source y */
-    int32_t width;  /* rect width */
-    int32_t height; /* rect height */
-};
-
-/*
- * Framebuffer resize notification event
- * Capable backend sets feature-resize in xenstore.
- */
-#define XENFB_TYPE_RESIZE 3
-
-struct xenfb_resize
-{
-    uint8_t type;    /* XENFB_TYPE_RESIZE */
-    int32_t width;   /* width in pixels */
-    int32_t height;  /* height in pixels */
-    int32_t stride;  /* stride in bytes */
-    int32_t depth;   /* depth in bits */
-    int32_t offset;  /* offset of the framebuffer in bytes */
-};
-
-#define XENFB_OUT_EVENT_SIZE 40
-
-union xenfb_out_event
-{
-    uint8_t type;
-    struct xenfb_update update;
-    struct xenfb_resize resize;
-    char pad[XENFB_OUT_EVENT_SIZE];
-};
-
-/* In events (backend -> frontend) */
-
-/*
- * Frontends should ignore unknown in events.
- */
-
-/*
- * Framebuffer refresh period advice
- * Backend sends it to advise the frontend their preferred period of
- * refresh.  Frontends that keep the framebuffer constantly up-to-date
- * just ignore it.  Frontends that use the advice should immediately
- * refresh the framebuffer (and send an update notification event if
- * those have been requested), then use the update frequency to guide
- * their periodical refreshs.
- */
-#define XENFB_TYPE_REFRESH_PERIOD 1
-#define XENFB_NO_REFRESH 0
-
-struct xenfb_refresh_period
-{
-    uint8_t type;    /* XENFB_TYPE_UPDATE_PERIOD */
-    uint32_t period; /* period of refresh, in ms,
-                      * XENFB_NO_REFRESH if no refresh is needed */
-};
-
-#define XENFB_IN_EVENT_SIZE 40
-
-union xenfb_in_event
-{
-    uint8_t type;
-    struct xenfb_refresh_period refresh_period;
-    char pad[XENFB_IN_EVENT_SIZE];
-};
-
-/* shared page */
-
-#define XENFB_IN_RING_SIZE 1024
-#define XENFB_IN_RING_LEN (XENFB_IN_RING_SIZE / XENFB_IN_EVENT_SIZE)
-#define XENFB_IN_RING_OFFS 1024
-#define XENFB_IN_RING(page) \
-    ((union xenfb_in_event *)((char *)(page) + XENFB_IN_RING_OFFS))
-#define XENFB_IN_RING_REF(page, idx) \
-    (XENFB_IN_RING((page))[(idx) % XENFB_IN_RING_LEN])
-
-#define XENFB_OUT_RING_SIZE 2048
-#define XENFB_OUT_RING_LEN (XENFB_OUT_RING_SIZE / XENFB_OUT_EVENT_SIZE)
-#define XENFB_OUT_RING_OFFS (XENFB_IN_RING_OFFS + XENFB_IN_RING_SIZE)
-#define XENFB_OUT_RING(page) \
-    ((union xenfb_out_event *)((char *)(page) + XENFB_OUT_RING_OFFS))
-#define XENFB_OUT_RING_REF(page, idx) \
-    (XENFB_OUT_RING((page))[(idx) % XENFB_OUT_RING_LEN])
-
-struct xenfb_page
-{
-    uint32_t in_cons, in_prod;
-    uint32_t out_cons, out_prod;
-
-    int32_t width;          /* the width of the framebuffer (in pixels) */
-    int32_t height;         /* the height of the framebuffer (in pixels) */
-    uint32_t line_length;   /* the length of a row of pixels (in bytes) */
-    uint32_t mem_length;    /* the length of the framebuffer (in bytes) */
-    uint8_t depth;          /* the depth of a pixel (in bits) */
-
-    /*
-     * Framebuffer page directory
-     *
-     * Each directory page holds PAGE_SIZE / sizeof(*pd)
-     * framebuffer pages, and can thus map up to PAGE_SIZE *
-     * PAGE_SIZE / sizeof(*pd) bytes.  With PAGE_SIZE == 4096 and
-     * sizeof(unsigned long) == 4/8, that's 4 Megs 32 bit and 2 Megs
-     * 64 bit.  256 directories give enough room for a 512 Meg
-     * framebuffer with a max resolution of 12,800x10,240.  Should
-     * be enough for a while with room leftover for expansion.
-     */
-    unsigned long pd[256];
-};
-
-#endif
diff --git a/include/hw/xen/interface/io/kbdif.h b/include/hw/xen/interface/io/kbdif.h
deleted file mode 100644
index 1d68cd458e..0000000000
--- a/include/hw/xen/interface/io/kbdif.h
+++ /dev/null
@@ -1,566 +0,0 @@
-/*
- * kbdif.h -- Xen virtual keyboard/mouse
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Copyright (C) 2005 Anthony Liguori <aliguori@us.ibm.com>
- * Copyright (C) 2006 Red Hat, Inc., Markus Armbruster <armbru@redhat.com>
- */
-
-#ifndef __XEN_PUBLIC_IO_KBDIF_H__
-#define __XEN_PUBLIC_IO_KBDIF_H__
-
-/*
- *****************************************************************************
- *                     Feature and Parameter Negotiation
- *****************************************************************************
- *
- * The two halves of a para-virtual driver utilize nodes within
- * XenStore to communicate capabilities and to negotiate operating parameters.
- * This section enumerates these nodes which reside in the respective front and
- * backend portions of XenStore, following XenBus convention.
- *
- * All data in XenStore is stored as strings.  Nodes specifying numeric
- * values are encoded in decimal. Integer value ranges listed below are
- * expressed as fixed sized integer types capable of storing the conversion
- * of a properly formated node string, without loss of information.
- *
- *****************************************************************************
- *                            Backend XenBus Nodes
- *****************************************************************************
- *
- *---------------------------- Features supported ----------------------------
- *
- * Capable backend advertises supported features by publishing
- * corresponding entries in XenStore and puts 1 as the value of the entry.
- * If a feature is not supported then 0 must be set or feature entry omitted.
- *
- * feature-disable-keyboard
- *      Values:         <uint>
- *
- *      If there is no need to expose a virtual keyboard device by the
- *      frontend then this must be set to 1.
- *
- * feature-disable-pointer
- *      Values:         <uint>
- *
- *      If there is no need to expose a virtual pointer device by the
- *      frontend then this must be set to 1.
- *
- * feature-abs-pointer
- *      Values:         <uint>
- *
- *      Backends, which support reporting of absolute coordinates for pointer
- *      device should set this to 1.
- *
- * feature-multi-touch
- *      Values:         <uint>
- *
- *      Backends, which support reporting of multi-touch events
- *      should set this to 1.
- *
- * feature-raw-pointer
- *      Values:        <uint>
- *
- *      Backends, which support reporting raw (unscaled) absolute coordinates
- *      for pointer devices should set this to 1. Raw (unscaled) values have
- *      a range of [0, 0x7fff].
- *
- *-----------------------  Device Instance Parameters ------------------------
- *
- * unique-id
- *      Values:         <string>
- *
- *      After device instance initialization it is assigned a unique ID,
- *      so every instance of the frontend can be identified by the backend
- *      by this ID. This can be UUID or such.
- *
- *------------------------- Pointer Device Parameters ------------------------
- *
- * width
- *      Values:         <uint>
- *
- *      Maximum X coordinate (width) to be used by the frontend
- *      while reporting input events, pixels, [0; UINT32_MAX].
- *
- * height
- *      Values:         <uint>
- *
- *      Maximum Y coordinate (height) to be used by the frontend
- *      while reporting input events, pixels, [0; UINT32_MAX].
- *
- *----------------------- Multi-touch Device Parameters ----------------------
- *
- * multi-touch-num-contacts
- *      Values:         <uint>
- *
- *      Number of simultaneous touches reported.
- *
- * multi-touch-width
- *      Values:         <uint>
- *
- *      Width of the touch area to be used by the frontend
- *      while reporting input events, pixels, [0; UINT32_MAX].
- *
- * multi-touch-height
- *      Values:         <uint>
- *
- *      Height of the touch area to be used by the frontend
- *      while reporting input events, pixels, [0; UINT32_MAX].
- *
- *****************************************************************************
- *                            Frontend XenBus Nodes
- *****************************************************************************
- *
- *------------------------------ Feature request -----------------------------
- *
- * Capable frontend requests features from backend via setting corresponding
- * entries to 1 in XenStore. Requests for features not advertised as supported
- * by the backend have no effect.
- *
- * request-abs-pointer
- *      Values:         <uint>
- *
- *      Request backend to report absolute pointer coordinates
- *      (XENKBD_TYPE_POS) instead of relative ones (XENKBD_TYPE_MOTION).
- *
- * request-multi-touch
- *      Values:         <uint>
- *
- *      Request backend to report multi-touch events.
- *
- * request-raw-pointer
- *      Values:         <uint>
- *
- *      Request backend to report raw unscaled absolute pointer coordinates.
- *      This option is only valid if request-abs-pointer is also set.
- *      Raw unscaled coordinates have the range [0, 0x7fff]
- *
- *----------------------- Request Transport Parameters -----------------------
- *
- * event-channel
- *      Values:         <uint>
- *
- *      The identifier of the Xen event channel used to signal activity
- *      in the ring buffer.
- *
- * page-gref
- *      Values:         <uint>
- *
- *      The Xen grant reference granting permission for the backend to map
- *      a sole page in a single page sized event ring buffer.
- *
- * page-ref
- *      Values:         <uint>
- *
- *      OBSOLETE, not recommended for use.
- *      PFN of the shared page.
- */
-
-/*
- * EVENT CODES.
- */
-
-#define XENKBD_TYPE_MOTION             1
-#define XENKBD_TYPE_RESERVED           2
-#define XENKBD_TYPE_KEY                3
-#define XENKBD_TYPE_POS                4
-#define XENKBD_TYPE_MTOUCH             5
-
-/* Multi-touch event sub-codes */
-
-#define XENKBD_MT_EV_DOWN              0
-#define XENKBD_MT_EV_UP                1
-#define XENKBD_MT_EV_MOTION            2
-#define XENKBD_MT_EV_SYN               3
-#define XENKBD_MT_EV_SHAPE             4
-#define XENKBD_MT_EV_ORIENT            5
-
-/*
- * CONSTANTS, XENSTORE FIELD AND PATH NAME STRINGS, HELPERS.
- */
-
-#define XENKBD_DRIVER_NAME             "vkbd"
-
-#define XENKBD_FIELD_FEAT_DSBL_KEYBRD  "feature-disable-keyboard"
-#define XENKBD_FIELD_FEAT_DSBL_POINTER "feature-disable-pointer"
-#define XENKBD_FIELD_FEAT_ABS_POINTER  "feature-abs-pointer"
-#define XENKBD_FIELD_FEAT_RAW_POINTER  "feature-raw-pointer"
-#define XENKBD_FIELD_FEAT_MTOUCH       "feature-multi-touch"
-#define XENKBD_FIELD_REQ_ABS_POINTER   "request-abs-pointer"
-#define XENKBD_FIELD_REQ_RAW_POINTER   "request-raw-pointer"
-#define XENKBD_FIELD_REQ_MTOUCH        "request-multi-touch"
-#define XENKBD_FIELD_RING_GREF         "page-gref"
-#define XENKBD_FIELD_EVT_CHANNEL       "event-channel"
-#define XENKBD_FIELD_WIDTH             "width"
-#define XENKBD_FIELD_HEIGHT            "height"
-#define XENKBD_FIELD_MT_WIDTH          "multi-touch-width"
-#define XENKBD_FIELD_MT_HEIGHT         "multi-touch-height"
-#define XENKBD_FIELD_MT_NUM_CONTACTS   "multi-touch-num-contacts"
-#define XENKBD_FIELD_UNIQUE_ID         "unique-id"
-
-/* OBSOLETE, not recommended for use */
-#define XENKBD_FIELD_RING_REF          "page-ref"
-
-/*
- *****************************************************************************
- * Description of the protocol between frontend and backend driver.
- *****************************************************************************
- *
- * The two halves of a Para-virtual driver communicate with
- * each other using a shared page and an event channel.
- * Shared page contains a ring with event structures.
- *
- * All reserved fields in the structures below must be 0.
- *
- *****************************************************************************
- *                           Backend to frontend events
- *****************************************************************************
- *
- * Frontends should ignore unknown in events.
- * All event packets have the same length (40 octets)
- * All event packets have common header:
- *
- *          0         octet
- * +-----------------+
- * |       type      |
- * +-----------------+
- * type - uint8_t, event code, XENKBD_TYPE_???
- *
- *
- * Pointer relative movement event
- *         0                1                 2               3        octet
- * +----------------+----------------+----------------+----------------+
- * |  _TYPE_MOTION  |                     reserved                     | 4
- * +----------------+----------------+----------------+----------------+
- * |                               rel_x                               | 8
- * +----------------+----------------+----------------+----------------+
- * |                               rel_y                               | 12
- * +----------------+----------------+----------------+----------------+
- * |                               rel_z                               | 16
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 20
- * +----------------+----------------+----------------+----------------+
- * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 40
- * +----------------+----------------+----------------+----------------+
- *
- * rel_x - int32_t, relative X motion
- * rel_y - int32_t, relative Y motion
- * rel_z - int32_t, relative Z motion (wheel)
- */
-
-struct xenkbd_motion
-{
-    uint8_t type;
-    int32_t rel_x;
-    int32_t rel_y;
-    int32_t rel_z;
-};
-
-/*
- * Key event (includes pointer buttons)
- *         0                1                 2               3        octet
- * +----------------+----------------+----------------+----------------+
- * |  _TYPE_KEY     |     pressed    |            reserved             | 4
- * +----------------+----------------+----------------+----------------+
- * |                              keycode                              | 8
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 12
- * +----------------+----------------+----------------+----------------+
- * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 40
- * +----------------+----------------+----------------+----------------+
- *
- * pressed - uint8_t, 1 if pressed; 0 otherwise
- * keycode - uint32_t, KEY_* from linux/input.h
- */
-
-struct xenkbd_key
-{
-    uint8_t type;
-    uint8_t pressed;
-    uint32_t keycode;
-};
-
-/*
- * Pointer absolute position event
- *         0                1                 2               3        octet
- * +----------------+----------------+----------------+----------------+
- * |  _TYPE_POS     |                     reserved                     | 4
- * +----------------+----------------+----------------+----------------+
- * |                               abs_x                               | 8
- * +----------------+----------------+----------------+----------------+
- * |                               abs_y                               | 12
- * +----------------+----------------+----------------+----------------+
- * |                               rel_z                               | 16
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 20
- * +----------------+----------------+----------------+----------------+
- * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 40
- * +----------------+----------------+----------------+----------------+
- *
- * abs_x - int32_t, absolute X position (in FB pixels)
- * abs_y - int32_t, absolute Y position (in FB pixels)
- * rel_z - int32_t, relative Z motion (wheel)
- */
-
-struct xenkbd_position
-{
-    uint8_t type;
-    int32_t abs_x;
-    int32_t abs_y;
-    int32_t rel_z;
-};
-
-/*
- * Multi-touch event and its sub-types
- *
- * All multi-touch event packets have common header:
- *
- *         0                1                 2               3        octet
- * +----------------+----------------+----------------+----------------+
- * |  _TYPE_MTOUCH  |   event_type   |   contact_id   |    reserved    | 4
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 8
- * +----------------+----------------+----------------+----------------+
- *
- * event_type - unt8_t, multi-touch event sub-type, XENKBD_MT_EV_???
- * contact_id - unt8_t, ID of the contact
- *
- * Touch interactions can consist of one or more contacts.
- * For each contact, a series of events is generated, starting
- * with a down event, followed by zero or more motion events,
- * and ending with an up event. Events relating to the same
- * contact point can be identified by the ID of the sequence: contact ID.
- * Contact ID may be reused after XENKBD_MT_EV_UP event and
- * is in the [0; XENKBD_FIELD_NUM_CONTACTS - 1] range.
- *
- * For further information please refer to documentation on Wayland [1],
- * Linux [2] and Windows [3] multi-touch support.
- *
- * [1] https://cgit.freedesktop.org/wayland/wayland/tree/protocol/wayland.xml
- * [2] https://www.kernel.org/doc/Documentation/input/multi-touch-protocol.txt
- * [3] https://msdn.microsoft.com/en-us/library/jj151564(v=vs.85).aspx
- *
- *
- * Multi-touch down event - sent when a new touch is made: touch is assigned
- * a unique contact ID, sent with this and consequent events related
- * to this touch.
- *         0                1                 2               3        octet
- * +----------------+----------------+----------------+----------------+
- * |  _TYPE_MTOUCH  |   _MT_EV_DOWN  |   contact_id   |    reserved    | 4
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 8
- * +----------------+----------------+----------------+----------------+
- * |                               abs_x                               | 12
- * +----------------+----------------+----------------+----------------+
- * |                               abs_y                               | 16
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 20
- * +----------------+----------------+----------------+----------------+
- * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 40
- * +----------------+----------------+----------------+----------------+
- *
- * abs_x - int32_t, absolute X position, in pixels
- * abs_y - int32_t, absolute Y position, in pixels
- *
- * Multi-touch contact release event
- *         0                1                 2               3        octet
- * +----------------+----------------+----------------+----------------+
- * |  _TYPE_MTOUCH  |  _MT_EV_UP     |   contact_id   |    reserved    | 4
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 8
- * +----------------+----------------+----------------+----------------+
- * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 40
- * +----------------+----------------+----------------+----------------+
- *
- * Multi-touch motion event
- *         0                1                 2               3        octet
- * +----------------+----------------+----------------+----------------+
- * |  _TYPE_MTOUCH  |  _MT_EV_MOTION |   contact_id   |    reserved    | 4
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 8
- * +----------------+----------------+----------------+----------------+
- * |                               abs_x                               | 12
- * +----------------+----------------+----------------+----------------+
- * |                               abs_y                               | 16
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 20
- * +----------------+----------------+----------------+----------------+
- * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 40
- * +----------------+----------------+----------------+----------------+
- *
- * abs_x - int32_t, absolute X position, in pixels,
- * abs_y - int32_t, absolute Y position, in pixels,
- *
- * Multi-touch input synchronization event - shows end of a set of events
- * which logically belong together.
- *         0                1                 2               3        octet
- * +----------------+----------------+----------------+----------------+
- * |  _TYPE_MTOUCH  |  _MT_EV_SYN    |   contact_id   |    reserved    | 4
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 8
- * +----------------+----------------+----------------+----------------+
- * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 40
- * +----------------+----------------+----------------+----------------+
- *
- * Multi-touch shape event - touch point's shape has changed its shape.
- * Shape is approximated by an ellipse through the major and minor axis
- * lengths: major is the longer diameter of the ellipse and minor is the
- * shorter one. Center of the ellipse is reported via
- * XENKBD_MT_EV_DOWN/XENKBD_MT_EV_MOTION events.
- *         0                1                 2               3        octet
- * +----------------+----------------+----------------+----------------+
- * |  _TYPE_MTOUCH  |  _MT_EV_SHAPE  |   contact_id   |    reserved    | 4
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 8
- * +----------------+----------------+----------------+----------------+
- * |                               major                               | 12
- * +----------------+----------------+----------------+----------------+
- * |                               minor                               | 16
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 20
- * +----------------+----------------+----------------+----------------+
- * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 40
- * +----------------+----------------+----------------+----------------+
- *
- * major - unt32_t, length of the major axis, pixels
- * minor - unt32_t, length of the minor axis, pixels
- *
- * Multi-touch orientation event - touch point's shape has changed
- * its orientation: calculated as a clockwise angle between the major axis
- * of the ellipse and positive Y axis in degrees, [-180; +180].
- *         0                1                 2               3        octet
- * +----------------+----------------+----------------+----------------+
- * |  _TYPE_MTOUCH  |  _MT_EV_ORIENT |   contact_id   |    reserved    | 4
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 8
- * +----------------+----------------+----------------+----------------+
- * |           orientation           |            reserved             | 12
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 16
- * +----------------+----------------+----------------+----------------+
- * |/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/\/|
- * +----------------+----------------+----------------+----------------+
- * |                             reserved                              | 40
- * +----------------+----------------+----------------+----------------+
- *
- * orientation - int16_t, clockwise angle of the major axis
- */
-
-struct xenkbd_mtouch {
-    uint8_t type;            /* XENKBD_TYPE_MTOUCH */
-    uint8_t event_type;      /* XENKBD_MT_EV_??? */
-    uint8_t contact_id;
-    uint8_t reserved[5];     /* reserved for the future use */
-    union {
-        struct {
-            int32_t abs_x;   /* absolute X position, pixels */
-            int32_t abs_y;   /* absolute Y position, pixels */
-        } pos;
-        struct {
-            uint32_t major;  /* length of the major axis, pixels */
-            uint32_t minor;  /* length of the minor axis, pixels */
-        } shape;
-        int16_t orientation; /* clockwise angle of the major axis */
-    } u;
-};
-
-#define XENKBD_IN_EVENT_SIZE 40
-
-union xenkbd_in_event
-{
-    uint8_t type;
-    struct xenkbd_motion motion;
-    struct xenkbd_key key;
-    struct xenkbd_position pos;
-    struct xenkbd_mtouch mtouch;
-    char pad[XENKBD_IN_EVENT_SIZE];
-};
-
-/*
- *****************************************************************************
- *                            Frontend to backend events
- *****************************************************************************
- *
- * Out events may be sent only when requested by backend, and receipt
- * of an unknown out event is an error.
- * No out events currently defined.
-
- * All event packets have the same length (40 octets)
- * All event packets have common header:
- *          0         octet
- * +-----------------+
- * |       type      |
- * +-----------------+
- * type - uint8_t, event code
- */
-
-#define XENKBD_OUT_EVENT_SIZE 40
-
-union xenkbd_out_event
-{
-    uint8_t type;
-    char pad[XENKBD_OUT_EVENT_SIZE];
-};
-
-/*
- *****************************************************************************
- *                            Shared page
- *****************************************************************************
- */
-
-#define XENKBD_IN_RING_SIZE 2048
-#define XENKBD_IN_RING_LEN (XENKBD_IN_RING_SIZE / XENKBD_IN_EVENT_SIZE)
-#define XENKBD_IN_RING_OFFS 1024
-#define XENKBD_IN_RING(page) \
-    ((union xenkbd_in_event *)((char *)(page) + XENKBD_IN_RING_OFFS))
-#define XENKBD_IN_RING_REF(page, idx) \
-    (XENKBD_IN_RING((page))[(idx) % XENKBD_IN_RING_LEN])
-
-#define XENKBD_OUT_RING_SIZE 1024
-#define XENKBD_OUT_RING_LEN (XENKBD_OUT_RING_SIZE / XENKBD_OUT_EVENT_SIZE)
-#define XENKBD_OUT_RING_OFFS (XENKBD_IN_RING_OFFS + XENKBD_IN_RING_SIZE)
-#define XENKBD_OUT_RING(page) \
-    ((union xenkbd_out_event *)((char *)(page) + XENKBD_OUT_RING_OFFS))
-#define XENKBD_OUT_RING_REF(page, idx) \
-    (XENKBD_OUT_RING((page))[(idx) % XENKBD_OUT_RING_LEN])
-
-struct xenkbd_page
-{
-    uint32_t in_cons, in_prod;
-    uint32_t out_cons, out_prod;
-};
-
-#endif /* __XEN_PUBLIC_IO_KBDIF_H__ */
diff --git a/include/hw/xen/interface/io/netif.h b/include/hw/xen/interface/io/netif.h
deleted file mode 100644
index 48fa530950..0000000000
--- a/include/hw/xen/interface/io/netif.h
+++ /dev/null
@@ -1,1010 +0,0 @@
-/******************************************************************************
- * netif.h
- *
- * Unified network-device I/O interface for Xen guest OSes.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Copyright (c) 2003-2004, Keir Fraser
- */
-
-#ifndef __XEN_PUBLIC_IO_NETIF_H__
-#define __XEN_PUBLIC_IO_NETIF_H__
-
-#include "ring.h"
-#include "../grant_table.h"
-
-/*
- * Older implementation of Xen network frontend / backend has an
- * implicit dependency on the MAX_SKB_FRAGS as the maximum number of
- * ring slots a skb can use. Netfront / netback may not work as
- * expected when frontend and backend have different MAX_SKB_FRAGS.
- *
- * A better approach is to add mechanism for netfront / netback to
- * negotiate this value. However we cannot fix all possible
- * frontends, so we need to define a value which states the minimum
- * slots backend must support.
- *
- * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS
- * (18), which is proved to work with most frontends. Any new backend
- * which doesn't negotiate with frontend should expect frontend to
- * send a valid packet using slots up to this value.
- */
-#define XEN_NETIF_NR_SLOTS_MIN 18
-
-/*
- * Notifications after enqueuing any type of message should be conditional on
- * the appropriate req_event or rsp_event field in the shared ring.
- * If the client sends notification for rx requests then it should specify
- * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume
- * that it cannot safely queue packets (as it may not be kicked to send them).
- */
-
-/*
- * "feature-split-event-channels" is introduced to separate guest TX
- * and RX notification. Backend either doesn't support this feature or
- * advertises it via xenstore as 0 (disabled) or 1 (enabled).
- *
- * To make use of this feature, frontend should allocate two event
- * channels for TX and RX, advertise them to backend as
- * "event-channel-tx" and "event-channel-rx" respectively. If frontend
- * doesn't want to use this feature, it just writes "event-channel"
- * node as before.
- */
-
-/*
- * Multiple transmit and receive queues:
- * If supported, the backend will write the key "multi-queue-max-queues" to
- * the directory for that vif, and set its value to the maximum supported
- * number of queues.
- * Frontends that are aware of this feature and wish to use it can write the
- * key "multi-queue-num-queues", set to the number they wish to use, which
- * must be greater than zero, and no more than the value reported by the backend
- * in "multi-queue-max-queues".
- *
- * Queues replicate the shared rings and event channels.
- * "feature-split-event-channels" may optionally be used when using
- * multiple queues, but is not mandatory.
- *
- * Each queue consists of one shared ring pair, i.e. there must be the same
- * number of tx and rx rings.
- *
- * For frontends requesting just one queue, the usual event-channel and
- * ring-ref keys are written as before, simplifying the backend processing
- * to avoid distinguishing between a frontend that doesn't understand the
- * multi-queue feature, and one that does, but requested only one queue.
- *
- * Frontends requesting two or more queues must not write the toplevel
- * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys,
- * instead writing those keys under sub-keys having the name "queue-N" where
- * N is the integer ID of the queue for which those keys belong. Queues
- * are indexed from zero. For example, a frontend with two queues and split
- * event channels must write the following set of queue-related keys:
- *
- * /local/domain/1/device/vif/0/multi-queue-num-queues = "2"
- * /local/domain/1/device/vif/0/queue-0 = ""
- * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>"
- * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>"
- * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>"
- * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>"
- * /local/domain/1/device/vif/0/queue-1 = ""
- * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>"
- * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1"
- * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>"
- * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>"
- *
- * If there is any inconsistency in the XenStore data, the backend may
- * choose not to connect any queues, instead treating the request as an
- * error. This includes scenarios where more (or fewer) queues were
- * requested than the frontend provided details for.
- *
- * Mapping of packets to queues is considered to be a function of the
- * transmitting system (backend or frontend) and is not negotiated
- * between the two. Guests are free to transmit packets on any queue
- * they choose, provided it has been set up correctly. Guests must be
- * prepared to receive packets on any queue they have requested be set up.
- */
-
-/*
- * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum
- * offload off or on. If it is missing then the feature is assumed to be on.
- * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum
- * offload on or off. If it is missing then the feature is assumed to be off.
- */
-
-/*
- * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to
- * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither
- * frontends nor backends are assumed to be capable unless the flags are
- * present.
- */
-
-/*
- * "feature-multicast-control" and "feature-dynamic-multicast-control"
- * advertise the capability to filter ethernet multicast packets in the
- * backend. If the frontend wishes to take advantage of this feature then
- * it may set "request-multicast-control". If the backend only advertises
- * "feature-multicast-control" then "request-multicast-control" must be set
- * before the frontend moves into the connected state. The backend will
- * sample the value on this state transition and any subsequent change in
- * value will have no effect. However, if the backend also advertises
- * "feature-dynamic-multicast-control" then "request-multicast-control"
- * may be set by the frontend at any time. In this case, the backend will
- * watch the value and re-sample on watch events.
- *
- * If the sampled value of "request-multicast-control" is set then the
- * backend transmit side should no longer flood multicast packets to the
- * frontend, it should instead drop any multicast packet that does not
- * match in a filter list.
- * The list is amended by the frontend by sending dummy transmit requests
- * containing XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} extra-info fragments as
- * specified below.
- * Note that the filter list may be amended even if the sampled value of
- * "request-multicast-control" is not set, however the filter should only
- * be applied if it is set.
- */
-
-/*
- * Control ring
- * ============
- *
- * Some features, such as hashing (detailed below), require a
- * significant amount of out-of-band data to be passed from frontend to
- * backend. Use of xenstore is not suitable for large quantities of data
- * because of quota limitations and so a dedicated 'control ring' is used.
- * The ability of the backend to use a control ring is advertised by
- * setting:
- *
- * /local/domain/X/backend/<domid>/<vif>/feature-ctrl-ring = "1"
- *
- * The frontend provides a control ring to the backend by setting:
- *
- * /local/domain/<domid>/device/vif/<vif>/ctrl-ring-ref = <gref>
- * /local/domain/<domid>/device/vif/<vif>/event-channel-ctrl = <port>
- *
- * where <gref> is the grant reference of the shared page used to
- * implement the control ring and <port> is an event channel to be used
- * as a mailbox interrupt. These keys must be set before the frontend
- * moves into the connected state.
- *
- * The control ring uses a fixed request/response message size and is
- * balanced (i.e. one request to one response), so operationally it is much
- * the same as a transmit or receive ring.
- * Note that there is no requirement that responses are issued in the same
- * order as requests.
- */
-
-/*
- * Hash types
- * ==========
- *
- * For the purposes of the definitions below, 'Packet[]' is an array of
- * octets containing an IP packet without options, 'Array[X..Y]' means a
- * sub-array of 'Array' containing bytes X thru Y inclusive, and '+' is
- * used to indicate concatenation of arrays.
- */
-
-/*
- * A hash calculated over an IP version 4 header as follows:
- *
- * Buffer[0..8] = Packet[12..15] (source address) +
- *                Packet[16..19] (destination address)
- *
- * Result = Hash(Buffer, 8)
- */
-#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4 0
-#define XEN_NETIF_CTRL_HASH_TYPE_IPV4 \
-    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4)
-
-/*
- * A hash calculated over an IP version 4 header and TCP header as
- * follows:
- *
- * Buffer[0..12] = Packet[12..15] (source address) +
- *                 Packet[16..19] (destination address) +
- *                 Packet[20..21] (source port) +
- *                 Packet[22..23] (destination port)
- *
- * Result = Hash(Buffer, 12)
- */
-#define _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP 1
-#define XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP \
-    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV4_TCP)
-
-/*
- * A hash calculated over an IP version 6 header as follows:
- *
- * Buffer[0..32] = Packet[8..23]  (source address ) +
- *                 Packet[24..39] (destination address)
- *
- * Result = Hash(Buffer, 32)
- */
-#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6 2
-#define XEN_NETIF_CTRL_HASH_TYPE_IPV6 \
-    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6)
-
-/*
- * A hash calculated over an IP version 6 header and TCP header as
- * follows:
- *
- * Buffer[0..36] = Packet[8..23]  (source address) +
- *                 Packet[24..39] (destination address) +
- *                 Packet[40..41] (source port) +
- *                 Packet[42..43] (destination port)
- *
- * Result = Hash(Buffer, 36)
- */
-#define _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP 3
-#define XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP \
-    (1 << _XEN_NETIF_CTRL_HASH_TYPE_IPV6_TCP)
-
-/*
- * Hash algorithms
- * ===============
- */
-
-#define XEN_NETIF_CTRL_HASH_ALGORITHM_NONE 0
-
-/*
- * Toeplitz hash:
- */
-
-#define XEN_NETIF_CTRL_HASH_ALGORITHM_TOEPLITZ 1
-
-/*
- * Control requests (struct xen_netif_ctrl_request)
- * ================================================
- *
- * All requests have the following format:
- *
- *    0     1     2     3     4     5     6     7  octet
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * |    id     |   type    |         data[0]       |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * |         data[1]       |         data[2]       |
- * +-----+-----+-----+-----+-----------------------+
- *
- * id: the request identifier, echoed in response.
- * type: the type of request (see below)
- * data[]: any data associated with the request (determined by type)
- */
-
-struct xen_netif_ctrl_request {
-    uint16_t id;
-    uint16_t type;
-
-#define XEN_NETIF_CTRL_TYPE_INVALID               0
-#define XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS        1
-#define XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS        2
-#define XEN_NETIF_CTRL_TYPE_SET_HASH_KEY          3
-#define XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE 4
-#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE 5
-#define XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING      6
-#define XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM    7
-#define XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE 8
-#define XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING      9
-#define XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING     10
-
-    uint32_t data[3];
-};
-
-/*
- * Control responses (struct xen_netif_ctrl_response)
- * ==================================================
- *
- * All responses have the following format:
- *
- *    0     1     2     3     4     5     6     7  octet
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * |    id     |   type    |         status        |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * |         data          |
- * +-----+-----+-----+-----+
- *
- * id: the corresponding request identifier
- * type: the type of the corresponding request
- * status: the status of request processing
- * data: any data associated with the response (determined by type and
- *       status)
- */
-
-struct xen_netif_ctrl_response {
-    uint16_t id;
-    uint16_t type;
-    uint32_t status;
-
-#define XEN_NETIF_CTRL_STATUS_SUCCESS           0
-#define XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     1
-#define XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER 2
-#define XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   3
-
-    uint32_t data;
-};
-
-/*
- * Static Grants (struct xen_netif_gref)
- * =====================================
- *
- * A frontend may provide a fixed set of grant references to be mapped on
- * the backend. The message of type XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
- * prior its usage in the command ring allows for creation of these mappings.
- * The backend will maintain a fixed amount of these mappings.
- *
- * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE lets a frontend query how many
- * of these mappings can be kept.
- *
- * Each entry in the XEN_NETIF_CTRL_TYPE_{ADD,DEL}_GREF_MAPPING input table has
- * the following format:
- *
- *    0     1     2     3     4     5     6     7  octet
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * | grant ref             |  flags    |  status   |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- *
- * grant ref: grant reference (IN)
- * flags: flags describing the control operation (IN)
- * status: XEN_NETIF_CTRL_STATUS_* (OUT)
- *
- * 'status' is an output parameter which does not require to be set to zero
- * prior to its usage in the corresponding control messages.
- */
-
-struct xen_netif_gref {
-       grant_ref_t ref;
-       uint16_t flags;
-
-#define _XEN_NETIF_CTRLF_GREF_readonly    0
-#define XEN_NETIF_CTRLF_GREF_readonly    (1U<<_XEN_NETIF_CTRLF_GREF_readonly)
-
-       uint16_t status;
-};
-
-/*
- * Control messages
- * ================
- *
- * XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM
- * --------------------------------------
- *
- * This is sent by the frontend to set the desired hash algorithm.
- *
- * Request:
- *
- *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_ALGORITHM
- *  data[0] = a XEN_NETIF_CTRL_HASH_ALGORITHM_* value
- *  data[1] = 0
- *  data[2] = 0
- *
- * Response:
- *
- *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
- *                                                     supported
- *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The algorithm is not
- *                                                     supported
- *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
- *
- * NOTE: Setting data[0] to XEN_NETIF_CTRL_HASH_ALGORITHM_NONE disables
- *       hashing and the backend is free to choose how it steers packets
- *       to queues (which is the default behaviour).
- *
- * XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS
- * ----------------------------------
- *
- * This is sent by the frontend to query the types of hash supported by
- * the backend.
- *
- * Request:
- *
- *  type    = XEN_NETIF_CTRL_TYPE_GET_HASH_FLAGS
- *  data[0] = 0
- *  data[1] = 0
- *  data[2] = 0
- *
- * Response:
- *
- *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported
- *           XEN_NETIF_CTRL_STATUS_SUCCESS       - Operation successful
- *  data   = supported hash types (if operation was successful)
- *
- * NOTE: A valid hash algorithm must be selected before this operation can
- *       succeed.
- *
- * XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS
- * ----------------------------------
- *
- * This is sent by the frontend to set the types of hash that the backend
- * should calculate. (See above for hash type definitions).
- * Note that the 'maximal' type of hash should always be chosen. For
- * example, if the frontend sets both IPV4 and IPV4_TCP hash types then
- * the latter hash type should be calculated for any TCP packet and the
- * former only calculated for non-TCP packets.
- *
- * Request:
- *
- *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_FLAGS
- *  data[0] = bitwise OR of XEN_NETIF_CTRL_HASH_TYPE_* values
- *  data[1] = 0
- *  data[2] = 0
- *
- * Response:
- *
- *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
- *                                                     supported
- *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - One or more flag
- *                                                     value is invalid or
- *                                                     unsupported
- *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
- *  data   = 0
- *
- * NOTE: A valid hash algorithm must be selected before this operation can
- *       succeed.
- *       Also, setting data[0] to zero disables hashing and the backend
- *       is free to choose how it steers packets to queues.
- *
- * XEN_NETIF_CTRL_TYPE_SET_HASH_KEY
- * --------------------------------
- *
- * This is sent by the frontend to set the key of the hash if the algorithm
- * requires it. (See hash algorithms above).
- *
- * Request:
- *
- *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_KEY
- *  data[0] = grant reference of page containing the key (assumed to
- *            start at beginning of grant)
- *  data[1] = size of key in octets
- *  data[2] = 0
- *
- * Response:
- *
- *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
- *                                                     supported
- *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Key size is invalid
- *           XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   - Key size is larger
- *                                                     than the backend
- *                                                     supports
- *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
- *  data   = 0
- *
- * NOTE: Any key octets not specified are assumed to be zero (the key
- *       is assumed to be empty by default) and specifying a new key
- *       invalidates any previous key, hence specifying a key size of
- *       zero will clear the key (which ensures that the calculated hash
- *       will always be zero).
- *       The maximum size of key is algorithm and backend specific, but
- *       is also limited by the single grant reference.
- *       The grant reference may be read-only and must remain valid until
- *       the response has been processed.
- *
- * XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE
- * -----------------------------------------
- *
- * This is sent by the frontend to query the maximum size of mapping
- * table supported by the backend. The size is specified in terms of
- * table entries.
- *
- * Request:
- *
- *  type    = XEN_NETIF_CTRL_TYPE_GET_HASH_MAPPING_SIZE
- *  data[0] = 0
- *  data[1] = 0
- *  data[2] = 0
- *
- * Response:
- *
- *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED - Operation not supported
- *           XEN_NETIF_CTRL_STATUS_SUCCESS       - Operation successful
- *  data   = maximum number of entries allowed in the mapping table
- *           (if operation was successful) or zero if a mapping table is
- *           not supported (i.e. hash mapping is done only by modular
- *           arithmetic).
- *
- * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
- * -------------------------------------
- *
- * This is sent by the frontend to set the actual size of the mapping
- * table to be used by the backend. The size is specified in terms of
- * table entries.
- * Any previous table is invalidated by this message and any new table
- * is assumed to be zero filled.
- *
- * Request:
- *
- *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
- *  data[0] = number of entries in mapping table
- *  data[1] = 0
- *  data[2] = 0
- *
- * Response:
- *
- *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
- *                                                     supported
- *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size is invalid
- *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
- *  data   = 0
- *
- * NOTE: Setting data[0] to 0 means that hash mapping should be done
- *       using modular arithmetic.
- *
- * XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING
- * ------------------------------------
- *
- * This is sent by the frontend to set the content of the table mapping
- * hash value to queue number. The backend should calculate the hash from
- * the packet header, use it as an index into the table (modulo the size
- * of the table) and then steer the packet to the queue number found at
- * that index.
- *
- * Request:
- *
- *  type    = XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING
- *  data[0] = grant reference of page containing the mapping (sub-)table
- *            (assumed to start at beginning of grant)
- *  data[1] = size of (sub-)table in entries
- *  data[2] = offset, in entries, of sub-table within overall table
- *
- * Response:
- *
- *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
- *                                                     supported
- *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Table size or content
- *                                                     is invalid
- *           XEN_NETIF_CTRL_STATUS_BUFFER_OVERFLOW   - Table size is larger
- *                                                     than the backend
- *                                                     supports
- *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
- *  data   = 0
- *
- * NOTE: The overall table has the following format:
- *
- *          0     1     2     3     4     5     6     7  octet
- *       +-----+-----+-----+-----+-----+-----+-----+-----+
- *       |       mapping[0]      |       mapping[1]      |
- *       +-----+-----+-----+-----+-----+-----+-----+-----+
- *       |                       .                       |
- *       |                       .                       |
- *       |                       .                       |
- *       +-----+-----+-----+-----+-----+-----+-----+-----+
- *       |      mapping[N-2]     |      mapping[N-1]     |
- *       +-----+-----+-----+-----+-----+-----+-----+-----+
- *
- *       where N is specified by a XEN_NETIF_CTRL_TYPE_SET_HASH_MAPPING_SIZE
- *       message and each  mapping must specifies a queue between 0 and
- *       "multi-queue-num-queues" (see above).
- *       The backend may support a mapping table larger than can be
- *       mapped by a single grant reference. Thus sub-tables within a
- *       larger table can be individually set by sending multiple messages
- *       with differing offset values. Specifying a new sub-table does not
- *       invalidate any table data outside that range.
- *       The grant reference may be read-only and must remain valid until
- *       the response has been processed.
- *
- * XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE
- * -----------------------------------------
- *
- * This is sent by the frontend to fetch the number of grefs that can be kept
- * mapped in the backend.
- *
- * Request:
- *
- *  type    = XEN_NETIF_CTRL_TYPE_GET_GREF_MAPPING_SIZE
- *  data[0] = queue index (assumed 0 for single queue)
- *  data[1] = 0
- *  data[2] = 0
- *
- * Response:
- *
- *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
- *                                                     supported
- *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - The queue index is
- *                                                     out of range
- *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
- *  data   = maximum number of entries allowed in the gref mapping table
- *           (if operation was successful) or zero if it is not supported.
- *
- * XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
- * ------------------------------------
- *
- * This is sent by the frontend for backend to map a list of grant
- * references.
- *
- * Request:
- *
- *  type    = XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
- *  data[0] = queue index
- *  data[1] = grant reference of page containing the mapping list
- *            (r/w and assumed to start at beginning of page)
- *  data[2] = size of list in entries
- *
- * Response:
- *
- *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
- *                                                     supported
- *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed
- *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
- *
- * NOTE: Each entry in the input table has the format outlined
- *       in struct xen_netif_gref.
- *       Contrary to XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING, the struct
- *       xen_netif_gref 'status' field is not used and therefore the response
- *       'status' determines the success of this operation. In case of
- *       failure none of grants mappings get added in the backend.
- *
- * XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING
- * ------------------------------------
- *
- * This is sent by the frontend for backend to unmap a list of grant
- * references.
- *
- * Request:
- *
- *  type    = XEN_NETIF_CTRL_TYPE_DEL_GREF_MAPPING
- *  data[0] = queue index
- *  data[1] = grant reference of page containing the mapping list
- *            (r/w and assumed to start at beginning of page)
- *  data[2] = size of list in entries
- *
- * Response:
- *
- *  status = XEN_NETIF_CTRL_STATUS_NOT_SUPPORTED     - Operation not
- *                                                     supported
- *           XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER - Operation failed
- *           XEN_NETIF_CTRL_STATUS_SUCCESS           - Operation successful
- *  data   = number of entries that were unmapped
- *
- * NOTE: Each entry in the input table has the format outlined in struct
- *       xen_netif_gref.
- *       The struct xen_netif_gref 'status' field determines if the entry
- *       was successfully removed.
- *       The entries used are only the ones representing grant references that
- *       were previously the subject of a XEN_NETIF_CTRL_TYPE_ADD_GREF_MAPPING
- *       operation. Any other entries will have their status set to
- *       XEN_NETIF_CTRL_STATUS_INVALID_PARAMETER upon completion.
- */
-
-DEFINE_RING_TYPES(xen_netif_ctrl,
-                  struct xen_netif_ctrl_request,
-                  struct xen_netif_ctrl_response);
-
-/*
- * Guest transmit
- * ==============
- *
- * This is the 'wire' format for transmit (frontend -> backend) packets:
- *
- *  Fragment 1: netif_tx_request_t  - flags = NETTXF_*
- *                                    size = total packet size
- * [Extra 1: netif_extra_info_t]    - (only if fragment 1 flags include
- *                                     NETTXF_extra_info)
- *  ...
- * [Extra N: netif_extra_info_t]    - (only if extra N-1 flags include
- *                                     XEN_NETIF_EXTRA_MORE)
- *  ...
- *  Fragment N: netif_tx_request_t  - (only if fragment N-1 flags include
- *                                     NETTXF_more_data - flags on preceding
- *                                     extras are not relevant here)
- *                                    flags = 0
- *                                    size = fragment size
- *
- * NOTE:
- *
- * This format slightly is different from that used for receive
- * (backend -> frontend) packets. Specifically, in a multi-fragment
- * packet the actual size of fragment 1 can only be determined by
- * subtracting the sizes of fragments 2..N from the total packet size.
- *
- * Ring slot size is 12 octets, however not all request/response
- * structs use the full size.
- *
- * tx request data (netif_tx_request_t)
- * ------------------------------------
- *
- *    0     1     2     3     4     5     6     7  octet
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * | grant ref             | offset    | flags     |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * | id        | size      |
- * +-----+-----+-----+-----+
- *
- * grant ref: Reference to buffer page.
- * offset: Offset within buffer page.
- * flags: NETTXF_*.
- * id: request identifier, echoed in response.
- * size: packet size in bytes.
- *
- * tx response (netif_tx_response_t)
- * ---------------------------------
- *
- *    0     1     2     3     4     5     6     7  octet
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * | id        | status    | unused                |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * | unused                |
- * +-----+-----+-----+-----+
- *
- * id: reflects id in transmit request
- * status: NETIF_RSP_*
- *
- * Guest receive
- * =============
- *
- * This is the 'wire' format for receive (backend -> frontend) packets:
- *
- *  Fragment 1: netif_rx_request_t  - flags = NETRXF_*
- *                                    size = fragment size
- * [Extra 1: netif_extra_info_t]    - (only if fragment 1 flags include
- *                                     NETRXF_extra_info)
- *  ...
- * [Extra N: netif_extra_info_t]    - (only if extra N-1 flags include
- *                                     XEN_NETIF_EXTRA_MORE)
- *  ...
- *  Fragment N: netif_rx_request_t  - (only if fragment N-1 flags include
- *                                     NETRXF_more_data - flags on preceding
- *                                     extras are not relevant here)
- *                                    flags = 0
- *                                    size = fragment size
- *
- * NOTE:
- *
- * This format slightly is different from that used for transmit
- * (frontend -> backend) packets. Specifically, in a multi-fragment
- * packet the size of the packet can only be determined by summing the
- * sizes of fragments 1..N.
- *
- * Ring slot size is 8 octets.
- *
- * rx request (netif_rx_request_t)
- * -------------------------------
- *
- *    0     1     2     3     4     5     6     7  octet
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * | id        | pad       | gref                  |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- *
- * id: request identifier, echoed in response.
- * gref: reference to incoming granted frame.
- *
- * rx response (netif_rx_response_t)
- * ---------------------------------
- *
- *    0     1     2     3     4     5     6     7  octet
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * | id        | offset    | flags     | status    |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- *
- * id: reflects id in receive request
- * offset: offset in page of start of received packet
- * flags: NETRXF_*
- * status: -ve: NETIF_RSP_*; +ve: Rx'ed pkt size.
- *
- * NOTE: Historically, to support GSO on the frontend receive side, Linux
- *       netfront does not make use of the rx response id (because, as
- *       described below, extra info structures overlay the id field).
- *       Instead it assumes that responses always appear in the same ring
- *       slot as their corresponding request. Thus, to maintain
- *       compatibility, backends must make sure this is the case.
- *
- * Extra Info
- * ==========
- *
- * Can be present if initial request or response has NET{T,R}XF_extra_info,
- * or previous extra request has XEN_NETIF_EXTRA_MORE.
- *
- * The struct therefore needs to fit into either a tx or rx slot and
- * is therefore limited to 8 octets.
- *
- * NOTE: Because extra info data overlays the usual request/response
- *       structures, there is no id information in the opposite direction.
- *       So, if an extra info overlays an rx response the frontend can
- *       assume that it is in the same ring slot as the request that was
- *       consumed to make the slot available, and the backend must ensure
- *       this assumption is true.
- *
- * extra info (netif_extra_info_t)
- * -------------------------------
- *
- * General format:
- *
- *    0     1     2     3     4     5     6     7  octet
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * |type |flags| type specific data                |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * | padding for tx        |
- * +-----+-----+-----+-----+
- *
- * type: XEN_NETIF_EXTRA_TYPE_*
- * flags: XEN_NETIF_EXTRA_FLAG_*
- * padding for tx: present only in the tx case due to 8 octet limit
- *                 from rx case. Not shown in type specific entries
- *                 below.
- *
- * XEN_NETIF_EXTRA_TYPE_GSO:
- *
- *    0     1     2     3     4     5     6     7  octet
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * |type |flags| size      |type | pad | features  |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- *
- * type: Must be XEN_NETIF_EXTRA_TYPE_GSO
- * flags: XEN_NETIF_EXTRA_FLAG_*
- * size: Maximum payload size of each segment. For example,
- *       for TCP this is just the path MSS.
- * type: XEN_NETIF_GSO_TYPE_*: This determines the protocol of
- *       the packet and any extra features required to segment the
- *       packet properly.
- * features: EN_NETIF_GSO_FEAT_*: This specifies any extra GSO
- *           features required to process this packet, such as ECN
- *           support for TCPv4.
- *
- * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}:
- *
- *    0     1     2     3     4     5     6     7  octet
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * |type |flags| addr                              |
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- *
- * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}
- * flags: XEN_NETIF_EXTRA_FLAG_*
- * addr: address to add/remove
- *
- * XEN_NETIF_EXTRA_TYPE_HASH:
- *
- * A backend that supports teoplitz hashing is assumed to accept
- * this type of extra info in transmit packets.
- * A frontend that enables hashing is assumed to accept
- * this type of extra info in receive packets.
- *
- *    0     1     2     3     4     5     6     7  octet
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- * |type |flags|htype| alg |LSB ---- value ---- MSB|
- * +-----+-----+-----+-----+-----+-----+-----+-----+
- *
- * type: Must be XEN_NETIF_EXTRA_TYPE_HASH
- * flags: XEN_NETIF_EXTRA_FLAG_*
- * htype: Hash type (one of _XEN_NETIF_CTRL_HASH_TYPE_* - see above)
- * alg: The algorithm used to calculate the hash (one of
- *      XEN_NETIF_CTRL_HASH_TYPE_ALGORITHM_* - see above)
- * value: Hash value
- */
-
-/* Protocol checksum field is blank in the packet (hardware offload)? */
-#define _NETTXF_csum_blank     (0)
-#define  NETTXF_csum_blank     (1U<<_NETTXF_csum_blank)
-
-/* Packet data has been validated against protocol checksum. */
-#define _NETTXF_data_validated (1)
-#define  NETTXF_data_validated (1U<<_NETTXF_data_validated)
-
-/* Packet continues in the next request descriptor. */
-#define _NETTXF_more_data      (2)
-#define  NETTXF_more_data      (1U<<_NETTXF_more_data)
-
-/* Packet to be followed by extra descriptor(s). */
-#define _NETTXF_extra_info     (3)
-#define  NETTXF_extra_info     (1U<<_NETTXF_extra_info)
-
-#define XEN_NETIF_MAX_TX_SIZE 0xFFFF
-struct netif_tx_request {
-    grant_ref_t gref;
-    uint16_t offset;
-    uint16_t flags;
-    uint16_t id;
-    uint16_t size;
-};
-typedef struct netif_tx_request netif_tx_request_t;
-
-/* Types of netif_extra_info descriptors. */
-#define XEN_NETIF_EXTRA_TYPE_NONE      (0)  /* Never used - invalid */
-#define XEN_NETIF_EXTRA_TYPE_GSO       (1)  /* u.gso */
-#define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2)  /* u.mcast */
-#define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3)  /* u.mcast */
-#define XEN_NETIF_EXTRA_TYPE_HASH      (4)  /* u.hash */
-#define XEN_NETIF_EXTRA_TYPE_MAX       (5)
-
-/* netif_extra_info_t flags. */
-#define _XEN_NETIF_EXTRA_FLAG_MORE (0)
-#define XEN_NETIF_EXTRA_FLAG_MORE  (1U<<_XEN_NETIF_EXTRA_FLAG_MORE)
-
-/* GSO types */
-#define XEN_NETIF_GSO_TYPE_NONE         (0)
-#define XEN_NETIF_GSO_TYPE_TCPV4        (1)
-#define XEN_NETIF_GSO_TYPE_TCPV6        (2)
-
-/*
- * This structure needs to fit within both netif_tx_request_t and
- * netif_rx_response_t for compatibility.
- */
-struct netif_extra_info {
-    uint8_t type;
-    uint8_t flags;
-    union {
-        struct {
-            uint16_t size;
-            uint8_t type;
-            uint8_t pad;
-            uint16_t features;
-        } gso;
-        struct {
-            uint8_t addr[6];
-        } mcast;
-        struct {
-            uint8_t type;
-            uint8_t algorithm;
-            uint8_t value[4];
-        } hash;
-        uint16_t pad[3];
-    } u;
-};
-typedef struct netif_extra_info netif_extra_info_t;
-
-struct netif_tx_response {
-    uint16_t id;
-    int16_t  status;
-};
-typedef struct netif_tx_response netif_tx_response_t;
-
-struct netif_rx_request {
-    uint16_t    id;        /* Echoed in response message.        */
-    uint16_t    pad;
-    grant_ref_t gref;
-};
-typedef struct netif_rx_request netif_rx_request_t;
-
-/* Packet data has been validated against protocol checksum. */
-#define _NETRXF_data_validated (0)
-#define  NETRXF_data_validated (1U<<_NETRXF_data_validated)
-
-/* Protocol checksum field is blank in the packet (hardware offload)? */
-#define _NETRXF_csum_blank     (1)
-#define  NETRXF_csum_blank     (1U<<_NETRXF_csum_blank)
-
-/* Packet continues in the next request descriptor. */
-#define _NETRXF_more_data      (2)
-#define  NETRXF_more_data      (1U<<_NETRXF_more_data)
-
-/* Packet to be followed by extra descriptor(s). */
-#define _NETRXF_extra_info     (3)
-#define  NETRXF_extra_info     (1U<<_NETRXF_extra_info)
-
-/* Packet has GSO prefix. Deprecated but included for compatibility */
-#define _NETRXF_gso_prefix     (4)
-#define  NETRXF_gso_prefix     (1U<<_NETRXF_gso_prefix)
-
-struct netif_rx_response {
-    uint16_t id;
-    uint16_t offset;
-    uint16_t flags;
-    int16_t  status;
-};
-typedef struct netif_rx_response netif_rx_response_t;
-
-/*
- * Generate netif ring structures and types.
- */
-
-DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response);
-DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response);
-
-#define NETIF_RSP_DROPPED         -2
-#define NETIF_RSP_ERROR           -1
-#define NETIF_RSP_OKAY             0
-/* No response: used for auxiliary requests (e.g., netif_extra_info_t). */
-#define NETIF_RSP_NULL             1
-
-#endif
diff --git a/include/hw/xen/interface/io/protocols.h b/include/hw/xen/interface/io/protocols.h
deleted file mode 100644
index 52b4de0f81..0000000000
--- a/include/hw/xen/interface/io/protocols.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/******************************************************************************
- * protocols.h
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Copyright (c) 2008, Keir Fraser
- */
-
-#ifndef __XEN_PROTOCOLS_H__
-#define __XEN_PROTOCOLS_H__
-
-#define XEN_IO_PROTO_ABI_X86_32     "x86_32-abi"
-#define XEN_IO_PROTO_ABI_X86_64     "x86_64-abi"
-#define XEN_IO_PROTO_ABI_ARM        "arm-abi"
-
-#if defined(__i386__)
-# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_32
-#elif defined(__x86_64__)
-# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_X86_64
-#elif defined(__arm__) || defined(__aarch64__)
-# define XEN_IO_PROTO_ABI_NATIVE XEN_IO_PROTO_ABI_ARM
-#else
-# error arch fixup needed here
-#endif
-
-#endif
diff --git a/include/hw/xen/interface/io/ring.h b/include/hw/xen/interface/io/ring.h
deleted file mode 100644
index 115705f3f4..0000000000
--- a/include/hw/xen/interface/io/ring.h
+++ /dev/null
@@ -1,474 +0,0 @@
-/******************************************************************************
- * ring.h
- * 
- * Shared producer-consumer ring macros.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Tim Deegan and Andrew Warfield November 2004.
- */
-
-#ifndef __XEN_PUBLIC_IO_RING_H__
-#define __XEN_PUBLIC_IO_RING_H__
-
-/*
- * When #include'ing this header, you need to provide the following
- * declaration upfront:
- * - standard integers types (uint8_t, uint16_t, etc)
- * They are provided by stdint.h of the standard headers.
- *
- * Before using the different macros, you need to provide the following
- * macros:
- * - xen_mb()  a memory barrier
- * - xen_rmb() a read memory barrier
- * - xen_wmb() a write memory barrier
- * Example of those can be found in xenctrl.h.
- *
- * In addition, if you intend to use the FLEX macros, you also need to
- * provide the following, before invoking the FLEX macros:
- * - size_t
- * - memcpy
- * - grant_ref_t
- * These declarations are provided by string.h of the standard headers,
- * and grant_table.h from the Xen public headers.
- */
-
-typedef unsigned int RING_IDX;
-
-/* Round a 32-bit unsigned constant down to the nearest power of two. */
-#define __RD2(_x)  (((_x) & 0x00000002) ? 0x2                  : ((_x) & 0x1))
-#define __RD4(_x)  (((_x) & 0x0000000c) ? __RD2((_x)>>2)<<2    : __RD2(_x))
-#define __RD8(_x)  (((_x) & 0x000000f0) ? __RD4((_x)>>4)<<4    : __RD4(_x))
-#define __RD16(_x) (((_x) & 0x0000ff00) ? __RD8((_x)>>8)<<8    : __RD8(_x))
-#define __RD32(_x) (((_x) & 0xffff0000) ? __RD16((_x)>>16)<<16 : __RD16(_x))
-
-/*
- * Calculate size of a shared ring, given the total available space for the
- * ring and indexes (_sz), and the name tag of the request/response structure.
- * A ring contains as many entries as will fit, rounded down to the nearest 
- * power of two (so we can mask with (size-1) to loop around).
- */
-#define __CONST_RING_SIZE(_s, _sz) \
-    (__RD32(((_sz) - offsetof(struct _s##_sring, ring)) / \
-            sizeof_field(struct _s##_sring, ring[0])))
-/*
- * The same for passing in an actual pointer instead of a name tag.
- */
-#define __RING_SIZE(_s, _sz) \
-    (__RD32(((_sz) - (long)(_s)->ring + (long)(_s)) / sizeof((_s)->ring[0])))
-
-/*
- * Macros to make the correct C datatypes for a new kind of ring.
- * 
- * To make a new ring datatype, you need to have two message structures,
- * let's say request_t, and response_t already defined.
- *
- * In a header where you want the ring datatype declared, you then do:
- *
- *     DEFINE_RING_TYPES(mytag, request_t, response_t);
- *
- * These expand out to give you a set of types, as you can see below.
- * The most important of these are:
- * 
- *     mytag_sring_t      - The shared ring.
- *     mytag_front_ring_t - The 'front' half of the ring.
- *     mytag_back_ring_t  - The 'back' half of the ring.
- *
- * To initialize a ring in your code you need to know the location and size
- * of the shared memory area (PAGE_SIZE, for instance). To initialise
- * the front half:
- *
- *     mytag_front_ring_t front_ring;
- *     SHARED_RING_INIT((mytag_sring_t *)shared_page);
- *     FRONT_RING_INIT(&front_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
- *
- * Initializing the back follows similarly (note that only the front
- * initializes the shared ring):
- *
- *     mytag_back_ring_t back_ring;
- *     BACK_RING_INIT(&back_ring, (mytag_sring_t *)shared_page, PAGE_SIZE);
- */
-
-#define DEFINE_RING_TYPES(__name, __req_t, __rsp_t)                     \
-                                                                        \
-/* Shared ring entry */                                                 \
-union __name##_sring_entry {                                            \
-    __req_t req;                                                        \
-    __rsp_t rsp;                                                        \
-};                                                                      \
-                                                                        \
-/* Shared ring page */                                                  \
-struct __name##_sring {                                                 \
-    RING_IDX req_prod, req_event;                                       \
-    RING_IDX rsp_prod, rsp_event;                                       \
-    union {                                                             \
-        struct {                                                        \
-            uint8_t smartpoll_active;                                   \
-        } netif;                                                        \
-        struct {                                                        \
-            uint8_t msg;                                                \
-        } tapif_user;                                                   \
-        uint8_t pvt_pad[4];                                             \
-    } pvt;                                                              \
-    uint8_t __pad[44];                                                  \
-    union __name##_sring_entry ring[1]; /* variable-length */           \
-};                                                                      \
-                                                                        \
-/* "Front" end's private variables */                                   \
-struct __name##_front_ring {                                            \
-    RING_IDX req_prod_pvt;                                              \
-    RING_IDX rsp_cons;                                                  \
-    unsigned int nr_ents;                                               \
-    struct __name##_sring *sring;                                       \
-};                                                                      \
-                                                                        \
-/* "Back" end's private variables */                                    \
-struct __name##_back_ring {                                             \
-    RING_IDX rsp_prod_pvt;                                              \
-    RING_IDX req_cons;                                                  \
-    unsigned int nr_ents;                                               \
-    struct __name##_sring *sring;                                       \
-};                                                                      \
-                                                                        \
-/* Syntactic sugar */                                                   \
-typedef struct __name##_sring __name##_sring_t;                         \
-typedef struct __name##_front_ring __name##_front_ring_t;               \
-typedef struct __name##_back_ring __name##_back_ring_t
-
-/*
- * Macros for manipulating rings.
- * 
- * FRONT_RING_whatever works on the "front end" of a ring: here 
- * requests are pushed on to the ring and responses taken off it.
- * 
- * BACK_RING_whatever works on the "back end" of a ring: here 
- * requests are taken off the ring and responses put on.
- * 
- * N.B. these macros do NO INTERLOCKS OR FLOW CONTROL. 
- * This is OK in 1-for-1 request-response situations where the 
- * requestor (front end) never has more than RING_SIZE()-1
- * outstanding requests.
- */
-
-/* Initialising empty rings */
-#define SHARED_RING_INIT(_s) do {                                       \
-    (_s)->req_prod  = (_s)->rsp_prod  = 0;                              \
-    (_s)->req_event = (_s)->rsp_event = 1;                              \
-    (void)memset((_s)->pvt.pvt_pad, 0, sizeof((_s)->pvt.pvt_pad));      \
-    (void)memset((_s)->__pad, 0, sizeof((_s)->__pad));                  \
-} while(0)
-
-#define FRONT_RING_INIT(_r, _s, __size) do {                            \
-    (_r)->req_prod_pvt = 0;                                             \
-    (_r)->rsp_cons = 0;                                                 \
-    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
-    (_r)->sring = (_s);                                                 \
-} while (0)
-
-#define BACK_RING_INIT(_r, _s, __size) do {                             \
-    (_r)->rsp_prod_pvt = 0;                                             \
-    (_r)->req_cons = 0;                                                 \
-    (_r)->nr_ents = __RING_SIZE(_s, __size);                            \
-    (_r)->sring = (_s);                                                 \
-} while (0)
-
-/* How big is this ring? */
-#define RING_SIZE(_r)                                                   \
-    ((_r)->nr_ents)
-
-/* Number of free requests (for use on front side only). */
-#define RING_FREE_REQUESTS(_r)                                          \
-    (RING_SIZE(_r) - ((_r)->req_prod_pvt - (_r)->rsp_cons))
-
-/* Test if there is an empty slot available on the front ring.
- * (This is only meaningful from the front. )
- */
-#define RING_FULL(_r)                                                   \
-    (RING_FREE_REQUESTS(_r) == 0)
-
-/* Test if there are outstanding messages to be processed on a ring. */
-#define RING_HAS_UNCONSUMED_RESPONSES(_r)                               \
-    ((_r)->sring->rsp_prod - (_r)->rsp_cons)
-
-#define RING_HAS_UNCONSUMED_REQUESTS(_r) ({                             \
-    unsigned int req = (_r)->sring->req_prod - (_r)->req_cons;          \
-    unsigned int rsp = RING_SIZE(_r) -                                  \
-        ((_r)->req_cons - (_r)->rsp_prod_pvt);                          \
-    req < rsp ? req : rsp;                                              \
-})
-
-/* Direct access to individual ring elements, by index. */
-#define RING_GET_REQUEST(_r, _idx)                                      \
-    (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].req))
-
-/*
- * Get a local copy of a request.
- *
- * Use this in preference to RING_GET_REQUEST() so all processing is
- * done on a local copy that cannot be modified by the other end.
- *
- * Note that https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 may cause this
- * to be ineffective where _req is a struct which consists of only bitfields.
- */
-#define RING_COPY_REQUEST(_r, _idx, _req) do {				\
-        /* Use volatile to force the copy into _req. */			\
-        *(_req) = *(volatile typeof(_req))RING_GET_REQUEST(_r, _idx);	\
-} while (0)
-
-#define RING_GET_RESPONSE(_r, _idx)                                     \
-    (&((_r)->sring->ring[((_idx) & (RING_SIZE(_r) - 1))].rsp))
-
-/* Loop termination condition: Would the specified index overflow the ring? */
-#define RING_REQUEST_CONS_OVERFLOW(_r, _cons)                           \
-    (((_cons) - (_r)->rsp_prod_pvt) >= RING_SIZE(_r))
-
-/* Ill-behaved frontend determination: Can there be this many requests? */
-#define RING_REQUEST_PROD_OVERFLOW(_r, _prod)                           \
-    (((_prod) - (_r)->rsp_prod_pvt) > RING_SIZE(_r))
-
-#define RING_PUSH_REQUESTS(_r) do {                                     \
-    xen_wmb(); /* back sees requests /before/ updated producer index */ \
-    (_r)->sring->req_prod = (_r)->req_prod_pvt;                         \
-} while (0)
-
-#define RING_PUSH_RESPONSES(_r) do {                                    \
-    xen_wmb(); /* front sees resps /before/ updated producer index */   \
-    (_r)->sring->rsp_prod = (_r)->rsp_prod_pvt;                         \
-} while (0)
-
-/*
- * Notification hold-off (req_event and rsp_event):
- * 
- * When queueing requests or responses on a shared ring, it may not always be
- * necessary to notify the remote end. For example, if requests are in flight
- * in a backend, the front may be able to queue further requests without
- * notifying the back (if the back checks for new requests when it queues
- * responses).
- * 
- * When enqueuing requests or responses:
- * 
- *  Use RING_PUSH_{REQUESTS,RESPONSES}_AND_CHECK_NOTIFY(). The second argument
- *  is a boolean return value. True indicates that the receiver requires an
- *  asynchronous notification.
- * 
- * After dequeuing requests or responses (before sleeping the connection):
- * 
- *  Use RING_FINAL_CHECK_FOR_REQUESTS() or RING_FINAL_CHECK_FOR_RESPONSES().
- *  The second argument is a boolean return value. True indicates that there
- *  are pending messages on the ring (i.e., the connection should not be put
- *  to sleep).
- * 
- *  These macros will set the req_event/rsp_event field to trigger a
- *  notification on the very next message that is enqueued. If you want to
- *  create batches of work (i.e., only receive a notification after several
- *  messages have been enqueued) then you will need to create a customised
- *  version of the FINAL_CHECK macro in your own code, which sets the event
- *  field appropriately.
- */
-
-#define RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(_r, _notify) do {           \
-    RING_IDX __old = (_r)->sring->req_prod;                             \
-    RING_IDX __new = (_r)->req_prod_pvt;                                \
-    xen_wmb(); /* back sees requests /before/ updated producer index */ \
-    (_r)->sring->req_prod = __new;                                      \
-    xen_mb(); /* back sees new requests /before/ we check req_event */  \
-    (_notify) = ((RING_IDX)(__new - (_r)->sring->req_event) <           \
-                 (RING_IDX)(__new - __old));                            \
-} while (0)
-
-#define RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(_r, _notify) do {          \
-    RING_IDX __old = (_r)->sring->rsp_prod;                             \
-    RING_IDX __new = (_r)->rsp_prod_pvt;                                \
-    xen_wmb(); /* front sees resps /before/ updated producer index */   \
-    (_r)->sring->rsp_prod = __new;                                      \
-    xen_mb(); /* front sees new resps /before/ we check rsp_event */    \
-    (_notify) = ((RING_IDX)(__new - (_r)->sring->rsp_event) <           \
-                 (RING_IDX)(__new - __old));                            \
-} while (0)
-
-#define RING_FINAL_CHECK_FOR_REQUESTS(_r, _work_to_do) do {             \
-    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \
-    if (_work_to_do) break;                                             \
-    (_r)->sring->req_event = (_r)->req_cons + 1;                        \
-    xen_mb();                                                           \
-    (_work_to_do) = RING_HAS_UNCONSUMED_REQUESTS(_r);                   \
-} while (0)
-
-#define RING_FINAL_CHECK_FOR_RESPONSES(_r, _work_to_do) do {            \
-    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
-    if (_work_to_do) break;                                             \
-    (_r)->sring->rsp_event = (_r)->rsp_cons + 1;                        \
-    xen_mb();                                                           \
-    (_work_to_do) = RING_HAS_UNCONSUMED_RESPONSES(_r);                  \
-} while (0)
-
-
-/*
- * DEFINE_XEN_FLEX_RING_AND_INTF defines two monodirectional rings and
- * functions to check if there is data on the ring, and to read and
- * write to them.
- *
- * DEFINE_XEN_FLEX_RING is similar to DEFINE_XEN_FLEX_RING_AND_INTF, but
- * does not define the indexes page. As different protocols can have
- * extensions to the basic format, this macro allow them to define their
- * own struct.
- *
- * XEN_FLEX_RING_SIZE
- *   Convenience macro to calculate the size of one of the two rings
- *   from the overall order.
- *
- * $NAME_mask
- *   Function to apply the size mask to an index, to reduce the index
- *   within the range [0-size].
- *
- * $NAME_read_packet
- *   Function to read data from the ring. The amount of data to read is
- *   specified by the "size" argument.
- *
- * $NAME_write_packet
- *   Function to write data to the ring. The amount of data to write is
- *   specified by the "size" argument.
- *
- * $NAME_get_ring_ptr
- *   Convenience function that returns a pointer to read/write to the
- *   ring at the right location.
- *
- * $NAME_data_intf
- *   Indexes page, shared between frontend and backend. It also
- *   contains the array of grant refs.
- *
- * $NAME_queued
- *   Function to calculate how many bytes are currently on the ring,
- *   ready to be read. It can also be used to calculate how much free
- *   space is currently on the ring (XEN_FLEX_RING_SIZE() -
- *   $NAME_queued()).
- */
-
-#ifndef XEN_PAGE_SHIFT
-/* The PAGE_SIZE for ring protocols and hypercall interfaces is always
- * 4K, regardless of the architecture, and page granularity chosen by
- * operating systems.
- */
-#define XEN_PAGE_SHIFT 12
-#endif
-#define XEN_FLEX_RING_SIZE(order)                                             \
-    (1UL << ((order) + XEN_PAGE_SHIFT - 1))
-
-#define DEFINE_XEN_FLEX_RING(name)                                            \
-static inline RING_IDX name##_mask(RING_IDX idx, RING_IDX ring_size)          \
-{                                                                             \
-    return idx & (ring_size - 1);                                             \
-}                                                                             \
-                                                                              \
-static inline unsigned char *name##_get_ring_ptr(unsigned char *buf,          \
-                                                 RING_IDX idx,                \
-                                                 RING_IDX ring_size)          \
-{                                                                             \
-    return buf + name##_mask(idx, ring_size);                                 \
-}                                                                             \
-                                                                              \
-static inline void name##_read_packet(void *opaque,                           \
-                                      const unsigned char *buf,               \
-                                      size_t size,                            \
-                                      RING_IDX masked_prod,                   \
-                                      RING_IDX *masked_cons,                  \
-                                      RING_IDX ring_size)                     \
-{                                                                             \
-    if (*masked_cons < masked_prod ||                                         \
-        size <= ring_size - *masked_cons) {                                   \
-        memcpy(opaque, buf + *masked_cons, size);                             \
-    } else {                                                                  \
-        memcpy(opaque, buf + *masked_cons, ring_size - *masked_cons);         \
-        memcpy((unsigned char *)opaque + ring_size - *masked_cons, buf,       \
-               size - (ring_size - *masked_cons));                            \
-    }                                                                         \
-    *masked_cons = name##_mask(*masked_cons + size, ring_size);               \
-}                                                                             \
-                                                                              \
-static inline void name##_write_packet(unsigned char *buf,                    \
-                                       const void *opaque,                    \
-                                       size_t size,                           \
-                                       RING_IDX *masked_prod,                 \
-                                       RING_IDX masked_cons,                  \
-                                       RING_IDX ring_size)                    \
-{                                                                             \
-    if (*masked_prod < masked_cons ||                                         \
-        size <= ring_size - *masked_prod) {                                   \
-        memcpy(buf + *masked_prod, opaque, size);                             \
-    } else {                                                                  \
-        memcpy(buf + *masked_prod, opaque, ring_size - *masked_prod);         \
-        memcpy(buf, (unsigned char *)opaque + (ring_size - *masked_prod),     \
-               size - (ring_size - *masked_prod));                            \
-    }                                                                         \
-    *masked_prod = name##_mask(*masked_prod + size, ring_size);               \
-}                                                                             \
-                                                                              \
-static inline RING_IDX name##_queued(RING_IDX prod,                           \
-                                     RING_IDX cons,                           \
-                                     RING_IDX ring_size)                      \
-{                                                                             \
-    RING_IDX size;                                                            \
-                                                                              \
-    if (prod == cons)                                                         \
-        return 0;                                                             \
-                                                                              \
-    prod = name##_mask(prod, ring_size);                                      \
-    cons = name##_mask(cons, ring_size);                                      \
-                                                                              \
-    if (prod == cons)                                                         \
-        return ring_size;                                                     \
-                                                                              \
-    if (prod > cons)                                                          \
-        size = prod - cons;                                                   \
-    else                                                                      \
-        size = ring_size - (cons - prod);                                     \
-    return size;                                                              \
-}                                                                             \
-                                                                              \
-struct name##_data {                                                          \
-    unsigned char *in; /* half of the allocation */                           \
-    unsigned char *out; /* half of the allocation */                          \
-}
-
-#define DEFINE_XEN_FLEX_RING_AND_INTF(name)                                   \
-struct name##_data_intf {                                                     \
-    RING_IDX in_cons, in_prod;                                                \
-                                                                              \
-    uint8_t pad1[56];                                                         \
-                                                                              \
-    RING_IDX out_cons, out_prod;                                              \
-                                                                              \
-    uint8_t pad2[56];                                                         \
-                                                                              \
-    RING_IDX ring_order;                                                      \
-    grant_ref_t ref[];                                                        \
-};                                                                            \
-DEFINE_XEN_FLEX_RING(name)
-
-#endif /* __XEN_PUBLIC_IO_RING_H__ */
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
diff --git a/include/hw/xen/interface/io/usbif.h b/include/hw/xen/interface/io/usbif.h
deleted file mode 100644
index c6a58639d6..0000000000
--- a/include/hw/xen/interface/io/usbif.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * usbif.h
- *
- * USB I/O interface for Xen guest OSes.
- *
- * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
- * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#ifndef __XEN_PUBLIC_IO_USBIF_H__
-#define __XEN_PUBLIC_IO_USBIF_H__
-
-#include "ring.h"
-#include "../grant_table.h"
-
-/*
- * Feature and Parameter Negotiation
- * =================================
- * The two halves of a Xen pvUSB driver utilize nodes within the XenStore to
- * communicate capabilities and to negotiate operating parameters. This
- * section enumerates these nodes which reside in the respective front and
- * backend portions of the XenStore, following the XenBus convention.
- *
- * Any specified default value is in effect if the corresponding XenBus node
- * is not present in the XenStore.
- *
- * XenStore nodes in sections marked "PRIVATE" are solely for use by the
- * driver side whose XenBus tree contains them.
- *
- *****************************************************************************
- *                            Backend XenBus Nodes
- *****************************************************************************
- *
- *------------------ Backend Device Identification (PRIVATE) ------------------
- *
- * num-ports
- *      Values:         unsigned [1...31]
- *
- *      Number of ports for this (virtual) USB host connector.
- *
- * usb-ver
- *      Values:         unsigned [1...2]
- *
- *      USB version of this host connector: 1 = USB 1.1, 2 = USB 2.0.
- *
- * port/[1...31]
- *      Values:         string
- *
- *      Physical USB device connected to the given port, e.g. "3-1.5".
- *
- *****************************************************************************
- *                            Frontend XenBus Nodes
- *****************************************************************************
- *
- *----------------------- Request Transport Parameters -----------------------
- *
- * event-channel
- *      Values:         unsigned
- *
- *      The identifier of the Xen event channel used to signal activity
- *      in the ring buffer.
- *
- * urb-ring-ref
- *      Values:         unsigned
- *
- *      The Xen grant reference granting permission for the backend to map
- *      the sole page in a single page sized ring buffer. This is the ring
- *      buffer for urb requests.
- *
- * conn-ring-ref
- *      Values:         unsigned
- *
- *      The Xen grant reference granting permission for the backend to map
- *      the sole page in a single page sized ring buffer. This is the ring
- *      buffer for connection/disconnection requests.
- *
- * protocol
- *      Values:         string (XEN_IO_PROTO_ABI_*)
- *      Default Value:  XEN_IO_PROTO_ABI_NATIVE
- *
- *      The machine ABI rules governing the format of all ring request and
- *      response structures.
- *
- */
-
-enum usb_spec_version {
-	USB_VER_UNKNOWN = 0,
-	USB_VER_USB11,
-	USB_VER_USB20,
-	USB_VER_USB30,	/* not supported yet */
-};
-
-/*
- *  USB pipe in usbif_request
- *
- *  - port number:	bits 0-4
- *				(USB_MAXCHILDREN is 31)
- *
- *  - operation flag:	bit 5
- *				(0 = submit urb,
- *				 1 = unlink urb)
- *
- *  - direction:		bit 7
- *				(0 = Host-to-Device [Out]
- *				 1 = Device-to-Host [In])
- *
- *  - device address:	bits 8-14
- *
- *  - endpoint:		bits 15-18
- *
- *  - pipe type:	bits 30-31
- *				(00 = isochronous, 01 = interrupt,
- *				 10 = control, 11 = bulk)
- */
-
-#define USBIF_PIPE_PORT_MASK	0x0000001f
-#define USBIF_PIPE_UNLINK	0x00000020
-#define USBIF_PIPE_DIR		0x00000080
-#define USBIF_PIPE_DEV_MASK	0x0000007f
-#define USBIF_PIPE_DEV_SHIFT	8
-#define USBIF_PIPE_EP_MASK	0x0000000f
-#define USBIF_PIPE_EP_SHIFT	15
-#define USBIF_PIPE_TYPE_MASK	0x00000003
-#define USBIF_PIPE_TYPE_SHIFT	30
-#define USBIF_PIPE_TYPE_ISOC	0
-#define USBIF_PIPE_TYPE_INT	1
-#define USBIF_PIPE_TYPE_CTRL	2
-#define USBIF_PIPE_TYPE_BULK	3
-
-#define usbif_pipeportnum(pipe)			((pipe) & USBIF_PIPE_PORT_MASK)
-#define usbif_setportnum_pipe(pipe, portnum)	((pipe) | (portnum))
-
-#define usbif_pipeunlink(pipe)			((pipe) & USBIF_PIPE_UNLINK)
-#define usbif_pipesubmit(pipe)			(!usbif_pipeunlink(pipe))
-#define usbif_setunlink_pipe(pipe)		((pipe) | USBIF_PIPE_UNLINK)
-
-#define usbif_pipein(pipe)			((pipe) & USBIF_PIPE_DIR)
-#define usbif_pipeout(pipe)			(!usbif_pipein(pipe))
-
-#define usbif_pipedevice(pipe)			\
-		(((pipe) >> USBIF_PIPE_DEV_SHIFT) & USBIF_PIPE_DEV_MASK)
-
-#define usbif_pipeendpoint(pipe)		\
-		(((pipe) >> USBIF_PIPE_EP_SHIFT) & USBIF_PIPE_EP_MASK)
-
-#define usbif_pipetype(pipe)			\
-		(((pipe) >> USBIF_PIPE_TYPE_SHIFT) & USBIF_PIPE_TYPE_MASK)
-#define usbif_pipeisoc(pipe)	(usbif_pipetype(pipe) == USBIF_PIPE_TYPE_ISOC)
-#define usbif_pipeint(pipe)	(usbif_pipetype(pipe) == USBIF_PIPE_TYPE_INT)
-#define usbif_pipectrl(pipe)	(usbif_pipetype(pipe) == USBIF_PIPE_TYPE_CTRL)
-#define usbif_pipebulk(pipe)	(usbif_pipetype(pipe) == USBIF_PIPE_TYPE_BULK)
-
-#define USBIF_MAX_SEGMENTS_PER_REQUEST (16)
-#define USBIF_MAX_PORTNR	31
-#define USBIF_RING_SIZE	4096
-
-/*
- * RING for transferring urbs.
- */
-struct usbif_request_segment {
-	grant_ref_t gref;
-	uint16_t offset;
-	uint16_t length;
-};
-
-struct usbif_urb_request {
-	uint16_t id; /* request id */
-	uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */
-
-	/* basic urb parameter */
-	uint32_t pipe;
-	uint16_t transfer_flags;
-#define USBIF_SHORT_NOT_OK	0x0001
-	uint16_t buffer_length;
-	union {
-		uint8_t ctrl[8]; /* setup_packet (Ctrl) */
-
-		struct {
-			uint16_t interval; /* maximum (1024*8) in usb core */
-			uint16_t start_frame; /* start frame */
-			uint16_t number_of_packets; /* number of ISO packet */
-			uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */
-		} isoc;
-
-		struct {
-			uint16_t interval; /* maximum (1024*8) in usb core */
-			uint16_t pad[3];
-		} intr;
-
-		struct {
-			uint16_t unlink_id; /* unlink request id */
-			uint16_t pad[3];
-		} unlink;
-
-	} u;
-
-	/* urb data segments */
-	struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST];
-};
-typedef struct usbif_urb_request usbif_urb_request_t;
-
-struct usbif_urb_response {
-	uint16_t id; /* request id */
-	uint16_t start_frame;  /* start frame (ISO) */
-	int32_t status; /* status (non-ISO) */
-	int32_t actual_length; /* actual transfer length */
-	int32_t error_count; /* number of ISO errors */
-};
-typedef struct usbif_urb_response usbif_urb_response_t;
-
-DEFINE_RING_TYPES(usbif_urb, struct usbif_urb_request, struct usbif_urb_response);
-#define USB_URB_RING_SIZE __CONST_RING_SIZE(usbif_urb, USBIF_RING_SIZE)
-
-/*
- * RING for notifying connect/disconnect events to frontend
- */
-struct usbif_conn_request {
-	uint16_t id;
-};
-typedef struct usbif_conn_request usbif_conn_request_t;
-
-struct usbif_conn_response {
-	uint16_t id; /* request id */
-	uint8_t portnum; /* port number */
-	uint8_t speed; /* usb_device_speed */
-#define USBIF_SPEED_NONE	0
-#define USBIF_SPEED_LOW		1
-#define USBIF_SPEED_FULL	2
-#define USBIF_SPEED_HIGH	3
-};
-typedef struct usbif_conn_response usbif_conn_response_t;
-
-DEFINE_RING_TYPES(usbif_conn, struct usbif_conn_request, struct usbif_conn_response);
-#define USB_CONN_RING_SIZE __CONST_RING_SIZE(usbif_conn, USBIF_RING_SIZE)
-
-#endif /* __XEN_PUBLIC_IO_USBIF_H__ */
diff --git a/include/hw/xen/interface/io/xenbus.h b/include/hw/xen/interface/io/xenbus.h
deleted file mode 100644
index 2fbf2a7fdc..0000000000
--- a/include/hw/xen/interface/io/xenbus.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*****************************************************************************
- * xenbus.h
- *
- * Xenbus protocol details.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- * Copyright (C) 2005 XenSource Ltd.
- */
-
-#ifndef _XEN_PUBLIC_IO_XENBUS_H
-#define _XEN_PUBLIC_IO_XENBUS_H
-
-/*
- * The state of either end of the Xenbus, i.e. the current communication
- * status of initialisation across the bus.  States here imply nothing about
- * the state of the connection between the driver and the kernel's device
- * layers.
- */
-enum xenbus_state {
-    XenbusStateUnknown       = 0,
-
-    XenbusStateInitialising  = 1,
-
-    /*
-     * InitWait: Finished early initialisation but waiting for information
-     * from the peer or hotplug scripts.
-     */
-    XenbusStateInitWait      = 2,
-
-    /*
-     * Initialised: Waiting for a connection from the peer.
-     */
-    XenbusStateInitialised   = 3,
-
-    XenbusStateConnected     = 4,
-
-    /*
-     * Closing: The device is being closed due to an error or an unplug event.
-     */
-    XenbusStateClosing       = 5,
-
-    XenbusStateClosed        = 6,
-
-    /*
-     * Reconfiguring: The device is being reconfigured.
-     */
-    XenbusStateReconfiguring = 7,
-
-    XenbusStateReconfigured  = 8
-};
-typedef enum xenbus_state XenbusState;
-
-#endif /* _XEN_PUBLIC_IO_XENBUS_H */
-- 
2.39.0
[RFC PATCH v7bis 15/19] i386/xen: Initialize XenBus and legacy backends from pc_init1()
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

Now that we're close to being able to use the PV backends without actual
Xen, move the bus instantiation out from xen_hvm_init_pc() to pc_init1().

However, still only do it for (xen_mode == XEN_ATTACH) (i.e. when running
on true Xen) because we don't have XenStore ops for emulation yet, and
the XenBus instantiation failure is fatal. Once we have a functional
XenStore for emulated mode, this will become (xen_mode != XEN_DISABLED).

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/i386/pc_piix.c     | 17 +++++++++++++++++
 hw/i386/xen/xen-hvm.c | 11 -----------
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c
index df64dd8dcc..d6e61b47b4 100644
--- a/hw/i386/pc_piix.c
+++ b/hw/i386/pc_piix.c
@@ -59,6 +59,10 @@
 #include <xen/hvm/hvm_info_table.h>
 #include "hw/xen/xen_pt.h"
 #endif
+#ifdef CONFIG_XENFV_MACHINE
+#include "hw/xen/xen-legacy-backend.h"
+#include "hw/xen/xen-bus.h"
+#endif
 #include "migration/global_state.h"
 #include "migration/misc.h"
 #include "sysemu/numa.h"
@@ -170,6 +174,19 @@ static void pc_init1(MachineState *machine,
         }
     }
 
+#ifdef CONFIG_XENFV_MACHINE
+    if (xen_mode == XEN_ATTACH) {
+        /* Initialize backend core & drivers */
+        xen_bus_init();
+
+        if (xen_be_init() != 0) {
+            error_report("xen backend core setup failed");
+            exit(1);
+        }
+        xen_be_register_common();
+    }
+#endif
+
     pc_machine_init_sgx_epc(pcms);
     x86_cpus_init(x86ms, pcmc->default_cpu_version);
 
diff --git a/hw/i386/xen/xen-hvm.c b/hw/i386/xen/xen-hvm.c
index 779d923e10..48f289f8ee 100644
--- a/hw/i386/xen/xen-hvm.c
+++ b/hw/i386/xen/xen-hvm.c
@@ -20,8 +20,6 @@
 #include "hw/i386/apic-msidef.h"
 #include "hw/xen/xen_native.h"
 #include "hw/xen/xen_backend_ops.h"
-#include "hw/xen/xen-legacy-backend.h"
-#include "hw/xen/xen-bus.h"
 #include "hw/xen/xen-x86.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-migration.h"
@@ -1505,15 +1503,6 @@ void xen_hvm_init_pc(PCMachineState *pcms, MemoryRegion **ram_memory)
     QLIST_INIT(&state->dev_list);
     device_listener_register(&state->device_listener);
 
-    xen_bus_init();
-
-    /* Initialize backend core & drivers */
-    if (xen_be_init() != 0) {
-        error_report("xen backend core setup failed");
-        goto err;
-    }
-    xen_be_register_common();
-
     QLIST_INIT(&xen_physmap);
     xen_read_physmap(state);
 
-- 
2.39.0
[RFC PATCH v7bis 16/19] i386/xen: Implement HYPERVISOR_physdev_op
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

Just hook up the basic hypercalls to stubs in xen_evtchn.c for now.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/i386/kvm/xen_evtchn.c     |  25 ++++++++
 hw/i386/kvm/xen_evtchn.h     |  11 ++++
 target/i386/kvm/xen-compat.h |  19 ++++++
 target/i386/kvm/xen-emu.c    | 118 +++++++++++++++++++++++++++++++++++
 4 files changed, 173 insertions(+)

diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index b571517b2c..c381a9f405 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -1355,6 +1355,31 @@ int xen_evtchn_set_port(uint16_t port)
     return ret;
 }
 
+int xen_physdev_map_pirq(struct physdev_map_pirq *map)
+{
+    return -ENOTSUP;
+}
+
+int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
+{
+    return -ENOTSUP;
+}
+
+int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
+{
+    return -ENOTSUP;
+}
+
+int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
+{
+    return -ENOTSUP;
+}
+
+int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get)
+{
+    return -ENOTSUP;
+}
+
 struct xenevtchn_handle *xen_be_evtchn_open(void)
 {
     struct xenevtchn_handle *xc = g_new0(struct xenevtchn_handle, 1);
diff --git a/hw/i386/kvm/xen_evtchn.h b/hw/i386/kvm/xen_evtchn.h
index b7b6f4e592..ccf58aa796 100644
--- a/hw/i386/kvm/xen_evtchn.h
+++ b/hw/i386/kvm/xen_evtchn.h
@@ -65,4 +65,15 @@ int xen_evtchn_bind_interdomain_op(struct evtchn_bind_interdomain *interdomain);
 int xen_evtchn_bind_vcpu_op(struct evtchn_bind_vcpu *vcpu);
 int xen_evtchn_reset_op(struct evtchn_reset *reset);
 
+struct physdev_map_pirq;
+struct physdev_unmap_pirq;
+struct physdev_eoi;
+struct physdev_irq_status_query;
+struct physdev_get_free_pirq;
+int xen_physdev_map_pirq(struct physdev_map_pirq *map);
+int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap);
+int xen_physdev_eoi_pirq(struct physdev_eoi *eoi);
+int xen_physdev_query_pirq(struct physdev_irq_status_query *query);
+int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get);
+
 #endif /* QEMU_XEN_EVTCHN_H */
diff --git a/target/i386/kvm/xen-compat.h b/target/i386/kvm/xen-compat.h
index ff5d20e901..e86ffc7644 100644
--- a/target/i386/kvm/xen-compat.h
+++ b/target/i386/kvm/xen-compat.h
@@ -48,4 +48,23 @@ struct compat_xen_add_to_physmap_batch {
     COMPAT_HANDLE(int) errs;
 };
 
+struct compat_physdev_map_pirq {
+    domid_t domid;
+    uint16_t pad;
+    /* IN */
+    int type;
+    /* IN (ignored for ..._MULTI_MSI) */
+    int index;
+    /* IN or OUT */
+    int pirq;
+    /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */
+    int bus;
+    /* IN */
+    int devfn;
+    /* IN (also OUT for ..._MULTI_MSI) */
+    int entry_nr;
+    /* IN */
+    uint64_t table_base;
+} __attribute__((packed));
+
 #endif /* QEMU_I386_XEN_COMPAT_H */
diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index e2e5e96b67..623331c40e 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -1493,6 +1493,121 @@ static bool kvm_xen_hcall_gnttab_op(struct kvm_xen_exit *exit, X86CPU *cpu,
     return true;
 }
 
+static bool kvm_xen_hcall_physdev_op(struct kvm_xen_exit *exit, X86CPU *cpu,
+                                     int cmd, uint64_t arg)
+{
+    CPUState *cs = CPU(cpu);
+    int err;
+
+    switch (cmd) {
+    case PHYSDEVOP_map_pirq: {
+        struct physdev_map_pirq map;
+
+        if (hypercall_compat32(exit->u.hcall.longmode)) {
+            struct compat_physdev_map_pirq *map32 = (void *)&map;
+
+            if (kvm_copy_from_gva(cs, arg, map32, sizeof(*map32))) {
+                return -EFAULT;
+            }
+
+            /*
+             * The only thing that's different is the alignment of the
+             * uint64_t table_base at the end, which gets padding to make
+             * it 64-bit aligned in the 64-bit version.
+             */
+            qemu_build_assert(sizeof(*map32) == 36);
+            qemu_build_assert(offsetof(struct physdev_map_pirq, entry_nr) ==
+                              offsetof(struct compat_physdev_map_pirq, entry_nr));
+            memmove(&map.table_base, &map32->table_base, sizeof(map.table_base));
+        } else {
+            if (kvm_copy_from_gva(cs, arg, &map, sizeof(map))) {
+                err = -EFAULT;
+                break;
+            }
+        }
+        err = xen_physdev_map_pirq(&map);
+        /*
+         * Since table_base is an IN parameter and won't be changed, just
+         * copy the size of the compat structure back to the guest.
+         */
+        if (!err && kvm_copy_to_gva(cs, arg, &map,
+                                    sizeof(struct compat_physdev_map_pirq))) {
+            err = -EFAULT;
+        }
+        break;
+    }
+    case PHYSDEVOP_unmap_pirq: {
+        struct physdev_unmap_pirq unmap;
+
+        qemu_build_assert(sizeof(unmap) == 8);
+        if (kvm_copy_from_gva(cs, arg, &unmap, sizeof(unmap))) {
+            err = -EFAULT;
+            break;
+        }
+
+        err = xen_physdev_unmap_pirq(&unmap);
+        if (!err && kvm_copy_to_gva(cs, arg, &unmap, sizeof(unmap))) {
+            err = -EFAULT;
+        }
+        break;
+    }
+    case PHYSDEVOP_eoi: {
+        struct physdev_eoi eoi;
+
+        qemu_build_assert(sizeof(eoi) == 4);
+        if (kvm_copy_from_gva(cs, arg, &eoi, sizeof(eoi))) {
+            err = -EFAULT;
+            break;
+        }
+
+        err = xen_physdev_eoi_pirq(&eoi);
+        if (!err && kvm_copy_to_gva(cs, arg, &eoi, sizeof(eoi))) {
+            err = -EFAULT;
+        }
+        break;
+    }
+    case PHYSDEVOP_irq_status_query: {
+        struct physdev_irq_status_query query;
+
+        qemu_build_assert(sizeof(query) == 8);
+        if (kvm_copy_from_gva(cs, arg, &query, sizeof(query))) {
+            err = -EFAULT;
+            break;
+        }
+
+        err = xen_physdev_query_pirq(&query);
+        if (!err && kvm_copy_to_gva(cs, arg, &query, sizeof(query))) {
+            err = -EFAULT;
+        }
+        break;
+    }
+    case PHYSDEVOP_get_free_pirq: {
+        struct physdev_get_free_pirq get;
+
+        qemu_build_assert(sizeof(get) == 8);
+        if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
+            err = -EFAULT;
+            break;
+        }
+
+        err = xen_physdev_get_free_pirq(&get);
+        if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
+            err = -EFAULT;
+        }
+        break;
+    }
+    case PHYSDEVOP_pirq_eoi_gmfn_v2: // FreeBSD 13 makes this hypercall
+        err = -ENOSYS;
+        break;
+
+    default:
+        return false;
+    }
+
+    exit->u.hcall.result = err;
+    return true;
+}
+
 static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
 {
     uint16_t code = exit->u.hcall.input;
@@ -1527,6 +1642,9 @@ static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
     case __HYPERVISOR_memory_op:
         return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
                                        exit->u.hcall.params[1]);
+    case __HYPERVISOR_physdev_op:
+        return kvm_xen_hcall_physdev_op(exit, cpu, exit->u.hcall.params[0],
+                                        exit->u.hcall.params[1]);
     case __HYPERVISOR_xen_version:
         return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
                                          exit->u.hcall.params[1]);
-- 
2.39.0
[RFC PATCH v7bis 17/19] hw/xen: Implement emulated PIRQ hypercall support
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

This wires up the basic infrastructure but the actual interrupts aren't
there yet, so don't advertise it to the guest.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/i386/kvm/trace-events  |   4 +
 hw/i386/kvm/trace.h       |   1 +
 hw/i386/kvm/xen_evtchn.c  | 252 +++++++++++++++++++++++++++++++++++++-
 hw/i386/kvm/xen_evtchn.h  |   2 +
 meson.build               |   1 +
 target/i386/kvm/xen-emu.c |  15 +++
 6 files changed, 270 insertions(+), 5 deletions(-)
 create mode 100644 hw/i386/kvm/trace-events
 create mode 100644 hw/i386/kvm/trace.h

diff --git a/hw/i386/kvm/trace-events b/hw/i386/kvm/trace-events
new file mode 100644
index 0000000000..04e60c5bb8
--- /dev/null
+++ b/hw/i386/kvm/trace-events
@@ -0,0 +1,4 @@
+kvm_xen_map_pirq(int pirq, int gsi) "pirq %d gsi %d"
+kvm_xen_unmap_pirq(int pirq, int gsi) "pirq %d gsi %d"
+kvm_xen_get_free_pirq(int pirq, int type) "pirq %d type %d"
+kvm_xen_bind_pirq(int pirq, int port) "pirq %d port %d"
diff --git a/hw/i386/kvm/trace.h b/hw/i386/kvm/trace.h
new file mode 100644
index 0000000000..e55d0812fd
--- /dev/null
+++ b/hw/i386/kvm/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-hw_i386_kvm.h"
diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index c381a9f405..5672b083c3 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -23,6 +23,7 @@
 #include "exec/target_page.h"
 #include "exec/address-spaces.h"
 #include "migration/vmstate.h"
+#include "trace.h"
 
 #include "hw/sysbus.h"
 #include "hw/xen/xen.h"
@@ -105,6 +106,23 @@ struct xenevtchn_handle {
 #define PORT_INFO_TYPEVAL_REMOTE_QEMU           0x8000
 #define PORT_INFO_TYPEVAL_REMOTE_PORT_MASK      0x7FFF
 
+#define MAX_XEN_PIRQ 0x1048 /* Empirically */
+
+/*
+ * These 'emuirq' values are used by Xen in the LM stream... and yes, I am
+ * insane enough to think about guest-transparent live migration from actual
+ * Xen to QEMU, and ensuring that we can convert/consume the stream.
+ */
+#define IRQ_UNBOUND -1
+#define IRQ_PT -2
+#define IRQ_MSI_EMU -3
+
+
+struct pirq_info {
+    int gsi;
+    uint16_t port;
+};
+
 struct XenEvtchnState {
     /*< private >*/
     SysBusDevice busdev;
@@ -122,6 +140,14 @@ struct XenEvtchnState {
     qemu_irq gsis[GSI_NUM_PINS];
 
     struct xenevtchn_handle *be_handles[EVTCHN_2L_NR_CHANNELS];
+
+    /* GSI → PIRQ mapping (serialized) */
+    uint16_t gsi_pirq[GSI_NUM_PINS];
+    /* Bitmap of allocated PIRQs (serialized) */
+    uint64_t pirq_inuse[DIV_ROUND_UP(MAX_XEN_PIRQ, 64)];
+
+    /* Per-PIRQ information (rebuilt on migration) */
+    struct pirq_info pirq[MAX_XEN_PIRQ];
 };
 
 struct XenEvtchnState *xen_evtchn_singleton;
@@ -181,6 +207,9 @@ static const VMStateDescription xen_evtchn_vmstate = {
         VMSTATE_UINT32(nr_ports, XenEvtchnState),
         VMSTATE_STRUCT_VARRAY_UINT32(port_table, XenEvtchnState, nr_ports, 1,
                                      xen_evtchn_port_vmstate, XenEvtchnPort),
+        VMSTATE_UINT16_ARRAY(gsi_pirq, XenEvtchnState, GSI_NUM_PINS),
+        VMSTATE_UINT64_ARRAY(pirq_inuse, XenEvtchnState,
+                             DIV_ROUND_UP(MAX_XEN_PIRQ, 64)),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -935,6 +964,10 @@ static int close_port(XenEvtchnState *s, evtchn_port_t port)
     case EVTCHNSTAT_closed:
         return -ENOENT;
 
+    case EVTCHNSTAT_pirq:
+        s->pirq[p->type_val].port = 0;
+        break;
+
     case EVTCHNSTAT_virq:
         kvm_xen_set_vcpu_virq(virq_is_global(p->type_val) ? 0 : p->vcpu,
                               p->type_val, 0);
@@ -1129,6 +1162,37 @@ int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq)
     return ret;
 }
 
+int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq)
+{
+    XenEvtchnState *s = xen_evtchn_singleton;
+    int ret;
+
+    if (!s) {
+        return -ENOTSUP;
+    }
+
+    if (pirq->pirq >= MAX_XEN_PIRQ) {
+        return -EINVAL;
+    }
+
+    QEMU_LOCK_GUARD(&s->port_lock);
+
+    if (s->pirq[pirq->pirq].port) {
+        return -EBUSY;
+    }
+
+    ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq,
+                        &pirq->port);
+    if (ret) {
+        return ret;
+    }
+
+    s->pirq[pirq->pirq].port = pirq->port;
+    trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port);
+
+    return ret;
+}
+
 int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi)
 {
     XenEvtchnState *s = xen_evtchn_singleton;
@@ -1355,29 +1419,207 @@ int xen_evtchn_set_port(uint16_t port)
     return ret;
 }
 
+#define pirq_inuse_word(s, pirq) (s->pirq_inuse[((pirq) / 64)])
+#define pirq_inuse_bit(pirq) (1ULL << ((pirq) & 63))
+
+#define pirq_inuse(s, pirq) (pirq_inuse_word(s, pirq) & pirq_inuse_bit(pirq))
+
+static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
+{
+    uint16_t pirq;
+
+    /* Preserve the allocation strategy that Xen has. It looks like
+     * we *never* give out PIRQ 0-15, we give out 16-nr_irqs_gsi only
+     * to GSIs (counting up from 16), and then we count backwards from
+     * the top for MSIs or when the GSI space is exhausted. */
+    if (type == MAP_PIRQ_TYPE_GSI) {
+        for (pirq = 16 ; pirq < GSI_NUM_PINS; pirq++) {
+            if (pirq_inuse(s, pirq)) {
+                continue;
+            }
+
+            /* Found it */
+            goto found;
+        }
+    }
+    for (pirq = MAX_XEN_PIRQ - 1; pirq >= GSI_NUM_PINS; pirq--) {
+        /* Skip whole words at a time when they're full */
+        if (pirq_inuse_word(s, pirq) == UINT64_MAX) {
+            pirq &= ~63ULL;
+            continue;
+        }
+        if (pirq_inuse(s, pirq)) {
+            continue;
+        }
+
+        goto found;
+    }
+    return -ENOSPC;
+
+ found:
+    pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
+    if (gsi >= 0) {
+        assert(gsi <= GSI_NUM_PINS);
+        s->gsi_pirq[gsi] = pirq;
+    }
+    s->pirq[pirq].gsi = gsi;
+    return pirq;
+}
+
 int xen_physdev_map_pirq(struct physdev_map_pirq *map)
 {
-    return -ENOTSUP;
+    XenEvtchnState *s = xen_evtchn_singleton;
+    int pirq = map->pirq;
+    int gsi = map->index;
+
+    if (!s) {
+        return -ENOTSUP;
+    }
+
+    QEMU_LOCK_GUARD(&s->port_lock);
+
+    if (map->domid != DOMID_SELF && map->domid != xen_domid) {
+        return -EPERM;
+    }
+    if (map->type != MAP_PIRQ_TYPE_GSI) {
+        return -EINVAL;
+    }
+    if (gsi < 0 || gsi >= GSI_NUM_PINS) {
+        return -EINVAL;
+    }
+
+    if (pirq < 0) {
+        pirq = allocate_pirq(s, map->type, gsi);
+        if (pirq < 0) {
+            return pirq;
+        }
+        map->pirq = pirq;
+    } else if (pirq > MAX_XEN_PIRQ) {
+        return -EINVAL;
+    } else {
+        /* User specified a valid-looking PIRQ#. Allow it if it is
+         * allocated and not yet bound, or if it is unallocated */
+        if (pirq_inuse(s, pirq)) {
+            if (s->pirq[pirq].gsi != IRQ_UNBOUND) {
+                return -EBUSY;
+            }
+        } else {
+            /* If it was unused, mark it used now. */
+            pirq_inuse_word(s, pirq) |= pirq_inuse_bit(pirq);
+        }
+        /* Set the mapping in both directions. */
+        s->pirq[pirq].gsi = gsi;
+        s->gsi_pirq[gsi] = pirq;
+    }
+
+    trace_kvm_xen_map_pirq(pirq, gsi);
+    return 0;
 }
 
 int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
 {
-    return -ENOTSUP;
+    XenEvtchnState *s = xen_evtchn_singleton;
+    int pirq = unmap->pirq;
+    int gsi;
+
+    if (!s) {
+        return -ENOTSUP;
+    }
+
+    if (unmap->domid != DOMID_SELF && unmap->domid != xen_domid)
+        return -EPERM;
+    if (pirq < 0 || pirq >= MAX_XEN_PIRQ)
+        return -EINVAL;
+
+    QEMU_LOCK_GUARD(&s->port_lock);
+
+    if (!pirq_inuse(s, pirq)) {
+        return -ENOENT;
+    }
+
+    gsi = s->pirq[pirq].gsi;
+
+    /* We can only unmap GSI PIRQs */
+    if (gsi < 0) {
+        return -EINVAL;
+    }
+
+    s->gsi_pirq[gsi] = 0;
+    s->pirq[pirq].gsi = IRQ_UNBOUND; /* Doesn't actually matter because: */
+    pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq);
+
+    trace_kvm_xen_unmap_pirq(pirq, gsi);
+    return 0;
 }
 
 int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
 {
-    return -ENOTSUP;
+    XenEvtchnState *s = xen_evtchn_singleton;
+    int pirq = eoi->irq;
+    int gsi;
+
+    if (!s) {
+        return -ENOTSUP;
+    }
+
+    QEMU_LOCK_GUARD(&s->port_lock);
+
+    if (!pirq_inuse(s, pirq)) {
+        return -ENOENT;
+    }
+
+    gsi = s->pirq[pirq].gsi;
+    if (gsi < 0) {
+        return -EINVAL;
+    }
+
+    // XX: Reassert a level IRQ if needed */
+    return 0;
 }
 
 int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
 {
-    return -ENOTSUP;
+    XenEvtchnState *s = xen_evtchn_singleton;
+    int pirq = query->irq;
+
+    if (!s) {
+        return -ENOTSUP;
+    }
+
+    QEMU_LOCK_GUARD(&s->port_lock);
+
+    if (!pirq_inuse(s, pirq)) {
+        return -ENOENT;
+    }
+
+    if (s->pirq[pirq].gsi >= 0) {
+        query->flags = XENIRQSTAT_needs_eoi;
+    } else {
+        query->flags = 0;
+    }
+
+    return 0;
 }
 
 int xen_physdev_get_free_pirq(struct physdev_get_free_pirq *get)
 {
-    return -ENOTSUP;
+    XenEvtchnState *s = xen_evtchn_singleton;
+    int pirq;
+
+    if (!s) {
+        return -ENOTSUP;
+    }
+
+    QEMU_LOCK_GUARD(&s->port_lock);
+
+    pirq = allocate_pirq(s, get->type, IRQ_UNBOUND);
+    if (pirq < 0) {
+        return pirq;
+    }
+
+    get->pirq = pirq;
+    trace_kvm_xen_get_free_pirq(pirq, get->type);
+    return 0;
 }
 
 struct xenevtchn_handle *xen_be_evtchn_open(void)
diff --git a/hw/i386/kvm/xen_evtchn.h b/hw/i386/kvm/xen_evtchn.h
index ccf58aa796..2c12506cc2 100644
--- a/hw/i386/kvm/xen_evtchn.h
+++ b/hw/i386/kvm/xen_evtchn.h
@@ -48,6 +48,7 @@ struct evtchn_status;
 struct evtchn_close;
 struct evtchn_unmask;
 struct evtchn_bind_virq;
+struct evtchn_bind_pirq;
 struct evtchn_bind_ipi;
 struct evtchn_send;
 struct evtchn_alloc_unbound;
@@ -58,6 +59,7 @@ int xen_evtchn_status_op(struct evtchn_status *status);
 int xen_evtchn_close_op(struct evtchn_close *close);
 int xen_evtchn_unmask_op(struct evtchn_unmask *unmask);
 int xen_evtchn_bind_virq_op(struct evtchn_bind_virq *virq);
+int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq);
 int xen_evtchn_bind_ipi_op(struct evtchn_bind_ipi *ipi);
 int xen_evtchn_send_op(struct evtchn_send *send);
 int xen_evtchn_alloc_unbound_op(struct evtchn_alloc_unbound *alloc);
diff --git a/meson.build b/meson.build
index 6171183a30..743da8861c 100644
--- a/meson.build
+++ b/meson.build
@@ -2938,6 +2938,7 @@ if have_system
     'hw/i2c',
     'hw/i386',
     'hw/i386/xen',
+    'hw/i386/kvm',
     'hw/ide',
     'hw/input',
     'hw/intc',
diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index 623331c40e..03b999d0b5 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -1227,6 +1227,21 @@ static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
         }
         break;
     }
+    case EVTCHNOP_bind_pirq: {
+        struct evtchn_bind_pirq pirq;
+
+        qemu_build_assert(sizeof(pirq) == 12);
+        if (kvm_copy_from_gva(cs, arg, &pirq, sizeof(pirq))) {
+            err = -EFAULT;
+            break;
+        }
+
+        err = xen_evtchn_bind_pirq_op(&pirq);
+        if (!err && kvm_copy_to_gva(cs, arg, &pirq, sizeof(pirq))) {
+            err = -EFAULT;
+        }
+        break;
+    }
     case EVTCHNOP_bind_ipi: {
         struct evtchn_bind_ipi ipi;
 
-- 
2.39.0


[RFC PATCH v7bis 18/19] hw/xen: Support GSI mapping to PIRQ
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

If I advertise XENFEAT_hvm_pirqs then a guest now boots successfully as
long as I tell it 'pci=nomsi'.

[root@localhost ~]# cat /proc/interrupts
           CPU0
  0:         52   IO-APIC   2-edge      timer
  1:         16  xen-pirq   1-ioapic-edge  i8042
  4:       1534  xen-pirq   4-ioapic-edge  ttyS0
  8:          1  xen-pirq   8-ioapic-edge  rtc0
  9:          0  xen-pirq   9-ioapic-level  acpi
 11:       5648  xen-pirq  11-ioapic-level  ahci[0000:00:04.0]
 12:        257  xen-pirq  12-ioapic-edge  i8042
...

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/i386/kvm/xen_evtchn.c | 54 +++++++++++++++++++++++++++++++++++++++-
 hw/i386/kvm/xen_evtchn.h |  2 ++
 hw/i386/x86.c            | 16 ++++++++++++
 3 files changed, 71 insertions(+), 1 deletion(-)

diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index 5672b083c3..27a455d10c 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -145,6 +145,7 @@ struct XenEvtchnState {
     uint16_t gsi_pirq[GSI_NUM_PINS];
     /* Bitmap of allocated PIRQs (serialized) */
     uint64_t pirq_inuse[DIV_ROUND_UP(MAX_XEN_PIRQ, 64)];
+    uint32_t pirq_gsi_set;
 
     /* Per-PIRQ information (rebuilt on migration) */
     struct pirq_info pirq[MAX_XEN_PIRQ];
@@ -210,6 +211,7 @@ static const VMStateDescription xen_evtchn_vmstate = {
         VMSTATE_UINT16_ARRAY(gsi_pirq, XenEvtchnState, GSI_NUM_PINS),
         VMSTATE_UINT64_ARRAY(pirq_inuse, XenEvtchnState,
                              DIV_ROUND_UP(MAX_XEN_PIRQ, 64)),
+        VMSTATE_UINT32(pirq_gsi_set, XenEvtchnState),
         VMSTATE_END_OF_LIST()
     }
 };
@@ -1466,6 +1468,51 @@ static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
     return pirq;
 }
 
+bool xen_evtchn_set_gsi(int gsi, int level)
+{
+    XenEvtchnState *s = xen_evtchn_singleton;
+    int pirq;
+
+    assert(qemu_mutex_iothread_locked());
+
+    if (!s || gsi < 0 || gsi > GSI_NUM_PINS) {
+        return false;
+    }
+
+    /*
+     * Check that that it *isn't* the event channel GSI, and thus
+     * that we are not recursing and it's safe to take s->port_lock.
+     *
+     * Locking aside, it's perfectly sane to bail out early for that
+     * special case, as it would make no sense for the event channel
+     * GSI to be routed back to event channels, when the delivery
+     * method is to raise the GSI... that recursion wouldn't *just*
+     * be a locking issue.
+     */
+    if (gsi && gsi == s->callback_gsi) {
+        return false;
+    }
+
+    QEMU_LOCK_GUARD(&s->port_lock);
+
+    pirq = s->gsi_pirq[gsi];
+    if (!pirq) {
+        return false;
+    }
+
+    if (level) {
+        int port = s->pirq[pirq].port;
+
+        s->pirq_gsi_set |= (1U << gsi);
+        if (port) {
+            set_port_pending(s, port);
+        }
+    } else {
+        s->pirq_gsi_set &= ~(1U << gsi);
+    }
+    return true;
+}
+
 int xen_physdev_map_pirq(struct physdev_map_pirq *map)
 {
     XenEvtchnState *s = xen_evtchn_singleton;
@@ -1572,8 +1619,13 @@ int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
     if (gsi < 0) {
         return -EINVAL;
     }
+    if (s->pirq_gsi_set & (1U << gsi)) {
+        int port = s->pirq[pirq].port;
+        if (port) {
+            set_port_pending(s, port);
+        }
+    }
 
-    // XX: Reassert a level IRQ if needed */
     return 0;
 }
 
diff --git a/hw/i386/kvm/xen_evtchn.h b/hw/i386/kvm/xen_evtchn.h
index 2c12506cc2..dba9d6b021 100644
--- a/hw/i386/kvm/xen_evtchn.h
+++ b/hw/i386/kvm/xen_evtchn.h
@@ -24,6 +24,8 @@ void xen_evtchn_set_callback_level(int level);
 
 int xen_evtchn_set_port(uint16_t port);
 
+bool xen_evtchn_set_gsi(int gsi, int level);
+
 /*
  * These functions mirror the libxenevtchn library API, providing the QEMU
  * backend side of "interdomain" event channels.
diff --git a/hw/i386/x86.c b/hw/i386/x86.c
index 78cc131926..242ddba852 100644
--- a/hw/i386/x86.c
+++ b/hw/i386/x86.c
@@ -61,6 +61,11 @@
 #include CONFIG_DEVICES
 #include "kvm/kvm_i386.h"
 
+#ifdef CONFIG_XEN_EMU
+#include "hw/xen/xen.h"
+#include "hw/i386/kvm/xen_evtchn.h"
+#endif
+
 /* Physical Address of PVH entry point read from kernel ELF NOTE */
 static size_t pvh_start_addr;
 
@@ -608,6 +613,17 @@ void gsi_handler(void *opaque, int n, int level)
         }
         /* fall through */
     case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1:
+#ifdef CONFIG_XEN_EMU
+        /*
+         * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC
+         * routing actually works properly under Xen). And then to
+         * *either* the PIRQ handling or the I/OAPIC depending on
+         * whether the former wants it.
+         */
+        if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) {
+            break;
+        }
+#endif
         qemu_set_irq(s->ioapic_irq[n], level);
         break;
     case IO_APIC_SECONDARY_IRQBASE
-- 
2.39.0
[RFC PATCH v7bis 19/19] hw/xen: Support MSI mapping to PIRQ
Posted by David Woodhouse 1 year, 3 months ago
From: David Woodhouse <dwmw@amazon.co.uk>

The way that Xen handles MSI PIRQs is kind of awful.

There is a special MSI message which targets a PIRQ. The vector in the
low bits of data must be zero. The low 8 bits of the PIRQ# are in the
destination ID field, the extended destination ID field is unused, and
instead the high bits of the PIRQ# are in the high 32 bits of the address.

Using the high bits of the address means that we can't intercept and
translate these messages in kvm_send_msi(), because they won't be caught
by the APIC — addresses like 0x1000fee46000 aren't in the APIC's range.

So we catch them in pci_msi_trigger() instead, and deliver the event
channel directly.

That isn't even the worst part. The worst part is that Xen snoops on
writes to devices' MSI vectors while they are *masked*. When a MSI
message is written which looks like it targets a PIRQ, it remembers
the device and vector for later.

When the guest makes a hypercall to bind that PIRQ# (snooped from a
marked MSI vector) to an event channel port, Xen *unmasks* that MSI
vector on the device. Xen guests using PIRQ delivery of MSI don't
ever actually unmask the MSI for themselves.

Now that this is working we can finally enable XENFEAT_hvm_pirqs and
let the guest use it all.

Tested with passthrough igb and emulated e1000e + AHCI.

           CPU0       CPU1
  0:         65          0   IO-APIC   2-edge      timer
  1:          0         14  xen-pirq   1-ioapic-edge  i8042
  4:          0        846  xen-pirq   4-ioapic-edge  ttyS0
  8:          1          0  xen-pirq   8-ioapic-edge  rtc0
  9:          0          0  xen-pirq   9-ioapic-level  acpi
 12:        257          0  xen-pirq  12-ioapic-edge  i8042
 24:       9600          0  xen-percpu    -virq      timer0
 25:       2758          0  xen-percpu    -ipi       resched0
 26:          0          0  xen-percpu    -ipi       callfunc0
 27:          0          0  xen-percpu    -virq      debug0
 28:       1526          0  xen-percpu    -ipi       callfuncsingle0
 29:          0          0  xen-percpu    -ipi       spinlock0
 30:          0       8608  xen-percpu    -virq      timer1
 31:          0        874  xen-percpu    -ipi       resched1
 32:          0          0  xen-percpu    -ipi       callfunc1
 33:          0          0  xen-percpu    -virq      debug1
 34:          0       1617  xen-percpu    -ipi       callfuncsingle1
 35:          0          0  xen-percpu    -ipi       spinlock1
 36:          8          0   xen-dyn    -event     xenbus
 37:          0       6046  xen-pirq    -msi       ahci[0000:00:03.0]
 38:          1          0  xen-pirq    -msi-x     ens4
 39:          0         73  xen-pirq    -msi-x     ens4-rx-0
 40:         14          0  xen-pirq    -msi-x     ens4-rx-1
 41:          0         32  xen-pirq    -msi-x     ens4-tx-0
 42:         47          0  xen-pirq    -msi-x     ens4-tx-1

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
---
 hw/i386/kvm/trace-events   |   1 +
 hw/i386/kvm/xen-stubs.c    |  15 +++
 hw/i386/kvm/xen_evtchn.c   | 268 ++++++++++++++++++++++++++++++++++++-
 hw/i386/kvm/xen_evtchn.h   |   8 ++
 hw/pci/msi.c               |  11 ++
 hw/pci/msix.c              |   7 +
 hw/pci/pci.c               |  17 +++
 include/hw/pci/msi.h       |   1 +
 target/i386/kvm/kvm.c      |  12 +-
 target/i386/kvm/kvm_i386.h |   2 +
 target/i386/kvm/xen-emu.c  |   3 +-
 11 files changed, 335 insertions(+), 10 deletions(-)

diff --git a/hw/i386/kvm/trace-events b/hw/i386/kvm/trace-events
index 04e60c5bb8..b83c3eb965 100644
--- a/hw/i386/kvm/trace-events
+++ b/hw/i386/kvm/trace-events
@@ -2,3 +2,4 @@ kvm_xen_map_pirq(int pirq, int gsi) "pirq %d gsi %d"
 kvm_xen_unmap_pirq(int pirq, int gsi) "pirq %d gsi %d"
 kvm_xen_get_free_pirq(int pirq, int type) "pirq %d type %d"
 kvm_xen_bind_pirq(int pirq, int port) "pirq %d port %d"
+kvm_xen_unmask_pirq(int pirq, char *dev, int vector) "pirq %d dev %s vector %d"
diff --git a/hw/i386/kvm/xen-stubs.c b/hw/i386/kvm/xen-stubs.c
index 6f433dc995..5660a2ccad 100644
--- a/hw/i386/kvm/xen-stubs.c
+++ b/hw/i386/kvm/xen-stubs.c
@@ -12,6 +12,7 @@
 #include "qemu/osdep.h"
 #include "qapi/error.h"
 #include "qapi/qapi-commands-misc.h"
+#include "xen_evtchn.h"
 
 EvtchnInfoList *qmp_xen_event_list(Error **errp)
 {
@@ -23,3 +24,17 @@ void qmp_xen_event_inject(uint32_t port, Error **errp)
 {
     error_setg(errp, "Xen event channel emulation not enabled");
 }
+
+void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
+                          uint64_t addr, uint32_t data, bool is_masked)
+{
+}
+
+void xen_evtchn_remove_pci_device(PCIDevice *dev)
+{
+}
+
+bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
+{
+    return false;
+}
diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index 27a455d10c..3df76958bf 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -30,9 +30,10 @@
 #include "hw/i386/x86.h"
 #include "hw/i386/pc.h"
 #include "hw/pci/pci.h"
+#include "hw/pci/msi.h"
+#include "hw/pci/msix.h"
 #include "hw/irq.h"
 #include "hw/xen/xen_backend_ops.h"
-
 #include "xen_evtchn.h"
 #include "xen_overlay.h"
 #include "xen_xenstore.h"
@@ -45,6 +46,9 @@
 #include "standard-headers/xen/memory.h"
 #include "standard-headers/xen/hvm/params.h"
 
+/* XX: For kvm_update_msi_routes_all() */
+#include "target/i386/kvm/kvm_i386.h"
+
 #define TYPE_XEN_EVTCHN "xen-evtchn"
 OBJECT_DECLARE_SIMPLE_TYPE(XenEvtchnState, XEN_EVTCHN)
 
@@ -121,6 +125,11 @@ struct xenevtchn_handle {
 struct pirq_info {
     int gsi;
     uint16_t port;
+    PCIDevice *dev;
+    int vector;
+    bool is_msix;
+    bool is_masked;
+    bool is_translated;
 };
 
 struct XenEvtchnState {
@@ -147,7 +156,7 @@ struct XenEvtchnState {
     uint64_t pirq_inuse[DIV_ROUND_UP(MAX_XEN_PIRQ, 64)];
     uint32_t pirq_gsi_set;
 
-    /* Per-PIRQ information (rebuilt on migration) */
+    /* Per-PIRQ information (rebuilt on migration, protected by BQL) */
     struct pirq_info pirq[MAX_XEN_PIRQ];
 };
 
@@ -958,16 +967,22 @@ static bool virq_is_global(uint32_t virq)
     }
 }
 
-static int close_port(XenEvtchnState *s, evtchn_port_t port)
+static int close_port(XenEvtchnState *s, evtchn_port_t port, bool *flush_kvm_routes)
 {
     XenEvtchnPort *p = &s->port_table[port];
 
+    /* Because it *might* be a PIRQ port */
+    assert(qemu_mutex_iothread_locked());
+
     switch (p->type) {
     case EVTCHNSTAT_closed:
         return -ENOENT;
 
     case EVTCHNSTAT_pirq:
         s->pirq[p->type_val].port = 0;
+        if (s->pirq[p->type_val].is_translated) {
+            *flush_kvm_routes = true;
+        }
         break;
 
     case EVTCHNSTAT_virq:
@@ -1016,20 +1031,27 @@ static int close_port(XenEvtchnState *s, evtchn_port_t port)
 int xen_evtchn_soft_reset(void)
 {
     XenEvtchnState *s = xen_evtchn_singleton;
+    bool flush_kvm_routes;
     int i;
 
     if (!s) {
         return -ENOTSUP;
     }
 
+    qemu_mutex_lock_iothread();
     qemu_mutex_lock(&s->port_lock);
 
     for (i = 0; i < s->nr_ports; i++) {
-        close_port(s, i);
+        close_port(s, i, &flush_kvm_routes);
     }
 
     qemu_mutex_unlock(&s->port_lock);
 
+    if (flush_kvm_routes) {
+        kvm_update_msi_routes_all(NULL, true, 0, 0);
+    }
+
+    qemu_mutex_unlock_iothread();
     return 0;
 }
 
@@ -1045,6 +1067,7 @@ int xen_evtchn_reset_op(struct evtchn_reset *reset)
 int xen_evtchn_close_op(struct evtchn_close *close)
 {
     XenEvtchnState *s = xen_evtchn_singleton;
+    bool flush_kvm_routes = false;
     int ret;
 
     if (!s) {
@@ -1055,11 +1078,19 @@ int xen_evtchn_close_op(struct evtchn_close *close)
         return -EINVAL;
     }
 
+    qemu_mutex_lock_iothread();
     qemu_mutex_lock(&s->port_lock);
 
-    ret = close_port(s, close->port);
+    ret = close_port(s, close->port, &flush_kvm_routes);
 
     qemu_mutex_unlock(&s->port_lock);
+    qemu_mutex_unlock_iothread();
+
+    if (flush_kvm_routes) {
+        qemu_mutex_lock_iothread();
+        kvm_update_msi_routes_all(NULL, true, 0, 0);
+        qemu_mutex_unlock_iothread();
+    }
 
     return ret;
 }
@@ -1177,21 +1208,54 @@ int xen_evtchn_bind_pirq_op(struct evtchn_bind_pirq *pirq)
         return -EINVAL;
     }
 
-    QEMU_LOCK_GUARD(&s->port_lock);
+    QEMU_IOTHREAD_LOCK_GUARD();
 
     if (s->pirq[pirq->pirq].port) {
         return -EBUSY;
     }
 
+    qemu_mutex_lock(&s->port_lock);
+
     ret = allocate_port(s, 0, EVTCHNSTAT_pirq, pirq->pirq,
                         &pirq->port);
     if (ret) {
+        qemu_mutex_unlock(&s->port_lock);
         return ret;
     }
 
     s->pirq[pirq->pirq].port = pirq->port;
     trace_kvm_xen_bind_pirq(pirq->pirq, pirq->port);
 
+    qemu_mutex_unlock(&s->port_lock);
+
+    /*
+     * Need to do the unmask outside port_lock because it may call
+     * back into the MSI translate function.
+     */
+    if (s->pirq[pirq->pirq].gsi == IRQ_MSI_EMU) {
+        if (s->pirq[pirq->pirq].is_masked) {
+            PCIDevice *dev = s->pirq[pirq->pirq].dev;
+            int vector = s->pirq[pirq->pirq].vector;
+            char *dev_path = qdev_get_dev_path(DEVICE(dev));
+
+            trace_kvm_xen_unmask_pirq(pirq->pirq, dev_path, vector);
+            g_free(dev_path);
+
+            if (s->pirq[pirq->pirq].is_msix) {
+                msix_set_mask(dev, vector, false);
+            } else {
+                msi_set_mask(dev, vector, false, NULL);
+            }
+        } else if (s->pirq[pirq->pirq].is_translated) {
+            /*
+             * If KVM had attempted to translate this one before, make it try
+             * again. If we unmasked, then the notifier on the MSI(-X) vector
+             * will already have had the same effect.
+             */
+            kvm_update_msi_routes_all(NULL, true, 0, 0);
+        }
+    }
+
     return ret;
 }
 
@@ -1444,6 +1508,16 @@ static int allocate_pirq(XenEvtchnState *s, int type, int gsi)
             goto found;
         }
     }
+    for (pirq = 16 + GSI_NUM_PINS ; pirq < MAX_XEN_PIRQ; pirq++) {
+        if (pirq_inuse(s, pirq)) {
+            continue;
+        }
+
+        /* Found it */
+        goto found;
+    }
+
+
     for (pirq = MAX_XEN_PIRQ - 1; pirq >= GSI_NUM_PINS; pirq--) {
         /* Skip whole words at a time when they're full */
         if (pirq_inuse_word(s, pirq) == UINT64_MAX) {
@@ -1513,6 +1587,174 @@ bool xen_evtchn_set_gsi(int gsi, int level)
     return true;
 }
 
+static uint32_t msi_pirq_target(uint64_t addr, uint32_t data)
+{
+    /* The vector (in low 8 bits of data) must be zero */
+    if (data & 0xff)
+        return 0;
+
+    uint32_t pirq = (addr & 0xff000) >> 12;
+    pirq |= (addr >> 32) & 0xffffff00;
+
+    return pirq;
+}
+
+static void do_remove_pci_vector(XenEvtchnState *s, PCIDevice *dev, int vector,
+                                 int except_pirq)
+{
+    uint32_t pirq;
+
+    for (pirq = 0; pirq < MAX_XEN_PIRQ; pirq++) {
+        /*
+         * We could be cleverer here, but it isn't really a fast path, and
+         * this trivial optimisation is enough to let us skip the big gap
+         * in the middle a bit quicker (in terms of both loop iterations,
+         * and cache lines).
+         */
+        if (!(pirq & 63) && !(pirq_inuse_word(s, pirq))) {
+            pirq += 64;
+            continue;
+        }
+        if (except_pirq && pirq == except_pirq) {
+            continue;
+        }
+        if (s->pirq[pirq].dev != dev) {
+            continue;
+        }
+        if (vector != -1 && s->pirq[pirq].vector != vector) {
+            continue;
+        }
+
+        /* It could theoretically be bound to a port already, but that is OK. */
+        s->pirq[pirq].dev = dev;
+        s->pirq[pirq].gsi = IRQ_UNBOUND;
+        s->pirq[pirq].is_msix = false;
+        s->pirq[pirq].vector = 0;
+        s->pirq[pirq].is_masked = false;
+        s->pirq[pirq].is_translated = false;
+    }
+}
+
+void xen_evtchn_remove_pci_device(PCIDevice *dev)
+{
+    XenEvtchnState *s = xen_evtchn_singleton;
+
+    if (!s) {
+        return;
+    }
+
+    QEMU_LOCK_GUARD(&s->port_lock);
+    do_remove_pci_vector(s, dev, -1, 0);
+}
+
+void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
+                          uint64_t addr, uint32_t data, bool is_masked)
+{
+    XenEvtchnState *s = xen_evtchn_singleton;
+    uint32_t pirq;
+
+    if (!s) {
+        return;
+    }
+
+    assert(qemu_mutex_iothread_locked());
+
+    pirq = msi_pirq_target(addr, data);
+
+    /*
+     * The PIRQ# must be sane, and there must be an allocated PIRQ in
+     * IRQ_UNBOUND or IRQ_MSI_EMU state to match it.
+     */
+    if (!pirq || pirq >= MAX_XEN_PIRQ || !pirq_inuse(s, pirq) ||
+        (s->pirq[pirq].gsi != IRQ_UNBOUND &&
+         s->pirq[pirq].gsi != IRQ_MSI_EMU)) {
+        pirq = 0;
+    }
+
+    if (pirq) {
+        s->pirq[pirq].dev = dev;
+        s->pirq[pirq].gsi = IRQ_MSI_EMU;
+        s->pirq[pirq].is_msix = is_msix;
+        s->pirq[pirq].vector = vector;
+        s->pirq[pirq].is_masked = is_masked;
+    }
+
+    /* Remove any (other) entries for this {device, vector} */
+    do_remove_pci_vector(s, dev, vector, pirq);
+}
+
+bool xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
+                                   uint64_t address, uint32_t data)
+{
+    XenEvtchnState *s = xen_evtchn_singleton;
+    uint32_t pirq, port;
+    CPUState *cpu;
+
+    if (!s) {
+        return false;
+    }
+
+    assert(qemu_mutex_iothread_locked());
+
+    pirq = msi_pirq_target(address, data);
+    if (!pirq || pirq >= MAX_XEN_PIRQ) {
+        return false;
+    }
+
+    if (s->pirq[pirq].gsi != IRQ_MSI_EMU) {
+        return false;
+    }
+
+    /* Remember that KVM tried to translate this. It might need to try again. */
+    s->pirq[pirq].is_translated = true;
+
+    QEMU_LOCK_GUARD(&s->port_lock);
+
+    port = s->pirq[pirq].port;
+    if (!valid_port(port)) {
+        return false;
+    }
+
+    cpu = qemu_get_cpu(s->port_table[port].vcpu);
+    if (!cpu) {
+        return false;
+    }
+
+    route->type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+    route->u.xen_evtchn.port = port;
+    route->u.xen_evtchn.vcpu = kvm_arch_vcpu_id(cpu);
+    route->u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+    return true;
+}
+
+bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data)
+{
+    XenEvtchnState *s = xen_evtchn_singleton;
+    uint32_t pirq, port;
+
+    if (!s) {
+        return false;
+    }
+
+    assert(qemu_mutex_iothread_locked());
+
+    pirq = msi_pirq_target(address, data);
+    if (!pirq || pirq >= MAX_XEN_PIRQ) {
+        return false;
+    }
+
+    QEMU_LOCK_GUARD(&s->port_lock);
+
+    port = s->pirq[pirq].port;
+    if (!valid_port(port)) {
+        return false;
+    }
+
+    set_port_pending(s, port);
+    return true;
+}
+
 int xen_physdev_map_pirq(struct physdev_map_pirq *map)
 {
     XenEvtchnState *s = xen_evtchn_singleton;
@@ -1523,6 +1765,7 @@ int xen_physdev_map_pirq(struct physdev_map_pirq *map)
         return -ENOTSUP;
     }
 
+    QEMU_IOTHREAD_LOCK_GUARD();
     QEMU_LOCK_GUARD(&s->port_lock);
 
     if (map->domid != DOMID_SELF && map->domid != xen_domid) {
@@ -1578,9 +1821,11 @@ int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
     if (pirq < 0 || pirq >= MAX_XEN_PIRQ)
         return -EINVAL;
 
-    QEMU_LOCK_GUARD(&s->port_lock);
+    QEMU_IOTHREAD_LOCK_GUARD();
+    qemu_mutex_lock(&s->port_lock);
 
     if (!pirq_inuse(s, pirq)) {
+        qemu_mutex_unlock(&s->port_lock);
         return -ENOENT;
     }
 
@@ -1588,6 +1833,7 @@ int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
 
     /* We can only unmap GSI PIRQs */
     if (gsi < 0) {
+        qemu_mutex_unlock(&s->port_lock);
         return -EINVAL;
     }
 
@@ -1596,6 +1842,12 @@ int xen_physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
     pirq_inuse_word(s, pirq) &= ~pirq_inuse_bit(pirq);
 
     trace_kvm_xen_unmap_pirq(pirq, gsi);
+    qemu_mutex_unlock(&s->port_lock);
+
+    if (gsi == IRQ_MSI_EMU) {
+        kvm_update_msi_routes_all(NULL, true, 0, 0);
+    }
+
     return 0;
 }
 
@@ -1609,6 +1861,7 @@ int xen_physdev_eoi_pirq(struct physdev_eoi *eoi)
         return -ENOTSUP;
     }
 
+    QEMU_IOTHREAD_LOCK_GUARD();
     QEMU_LOCK_GUARD(&s->port_lock);
 
     if (!pirq_inuse(s, pirq)) {
@@ -1638,6 +1891,7 @@ int xen_physdev_query_pirq(struct physdev_irq_status_query *query)
         return -ENOTSUP;
     }
 
+    QEMU_IOTHREAD_LOCK_GUARD();
     QEMU_LOCK_GUARD(&s->port_lock);
 
     if (!pirq_inuse(s, pirq)) {
diff --git a/hw/i386/kvm/xen_evtchn.h b/hw/i386/kvm/xen_evtchn.h
index dba9d6b021..2bc8300e2e 100644
--- a/hw/i386/kvm/xen_evtchn.h
+++ b/hw/i386/kvm/xen_evtchn.h
@@ -25,6 +25,14 @@ void xen_evtchn_set_callback_level(int level);
 int xen_evtchn_set_port(uint16_t port);
 
 bool xen_evtchn_set_gsi(int gsi, int level);
+void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
+                          uint64_t addr, uint32_t data, bool is_masked);
+void xen_evtchn_remove_pci_device(PCIDevice *dev);
+struct kvm_irq_routing_entry;
+bool xen_evtchn_translate_pirq_msi(struct kvm_irq_routing_entry *route,
+                                   uint64_t address, uint32_t data);
+bool xen_evtchn_deliver_pirq_msi(uint64_t address, uint32_t data);
+
 
 /*
  * These functions mirror the libxenevtchn library API, providing the QEMU
diff --git a/hw/pci/msi.c b/hw/pci/msi.c
index 1cadf150bc..041b0bdbec 100644
--- a/hw/pci/msi.c
+++ b/hw/pci/msi.c
@@ -24,6 +24,8 @@
 #include "qemu/range.h"
 #include "qapi/error.h"
 
+#include "hw/i386/kvm/xen_evtchn.h"
+
 /* PCI_MSI_ADDRESS_LO */
 #define PCI_MSI_ADDRESS_LO_MASK         (~0x3)
 
@@ -414,6 +416,15 @@ void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len)
     fprintf(stderr, "\n");
 #endif
 
+    if (xen_mode == XEN_EMULATE) {
+        for (vector = 0; vector < msi_nr_vectors(flags); vector++) {
+            MSIMessage msg = msi_prepare_message(dev, vector);
+
+            xen_evtchn_snoop_msi(dev, false, vector, msg.address, msg.data,
+                                 msi_is_masked(dev, vector));
+        }
+    }
+
     if (!(flags & PCI_MSI_FLAGS_ENABLE)) {
         return;
     }
diff --git a/hw/pci/msix.c b/hw/pci/msix.c
index 9e70fcd6fa..ee2004b0b1 100644
--- a/hw/pci/msix.c
+++ b/hw/pci/msix.c
@@ -26,6 +26,8 @@
 #include "qapi/error.h"
 #include "trace.h"
 
+#include "hw/i386/kvm/xen_evtchn.h"
+
 /* MSI enable bit and maskall bit are in byte 1 in FLAGS register */
 #define MSIX_CONTROL_OFFSET (PCI_MSIX_FLAGS + 1)
 #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
@@ -124,6 +126,11 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
 {
     bool is_masked = msix_is_masked(dev, vector);
 
+    if (xen_mode == XEN_EMULATE) {
+        MSIMessage msg = msix_prepare_message(dev, vector);
+        xen_evtchn_snoop_msi(dev, true, vector, msg.address, msg.data, is_masked);
+    }
+
     if (is_masked == was_masked) {
         return;
     }
diff --git a/hw/pci/pci.c b/hw/pci/pci.c
index 39a7bb32aa..23c8fc00ae 100644
--- a/hw/pci/pci.c
+++ b/hw/pci/pci.c
@@ -49,6 +49,9 @@
 #include "qemu/cutils.h"
 #include "pci-internal.h"
 
+#include "hw/xen/xen.h"
+#include "hw/i386/kvm/xen_evtchn.h"
+
 //#define DEBUG_PCI
 #ifdef DEBUG_PCI
 # define PCI_DPRINTF(format, ...)       printf(format, ## __VA_ARGS__)
@@ -319,6 +322,17 @@ static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
 {
     MemTxAttrs attrs = {};
 
+    /*
+     * Xen uses the high bits of the address to contain some of the bits
+     * of the PIRQ#. Therefore we can't just send the write cycle and
+     * trust that it's caught by the APIC at 0xfee00000 because the
+     * target of the write might be e.g. 0x0x1000fee46000 for PIRQ#4166.
+     * So we intercept the delivery here instead of in kvm_send_msi().
+     */
+    if (xen_mode == XEN_EMULATE &&
+        xen_evtchn_deliver_pirq_msi(msg.address, msg.data)) {
+        return;
+    }
     attrs.requester_id = pci_requester_id(dev);
     address_space_stl_le(&dev->bus_master_as, msg.address, msg.data,
                          attrs, NULL);
@@ -988,6 +1002,9 @@ static void do_pci_unregister_device(PCIDevice *pci_dev)
     pci_get_bus(pci_dev)->devices[pci_dev->devfn] = NULL;
     pci_config_free(pci_dev);
 
+    if (xen_mode == XEN_EMULATE) {
+        xen_evtchn_remove_pci_device(pci_dev);
+    }
     if (memory_region_is_mapped(&pci_dev->bus_master_enable_region)) {
         memory_region_del_subregion(&pci_dev->bus_master_container_region,
                                     &pci_dev->bus_master_enable_region);
diff --git a/include/hw/pci/msi.h b/include/hw/pci/msi.h
index ee8ee469a6..abcfd13925 100644
--- a/include/hw/pci/msi.h
+++ b/include/hw/pci/msi.h
@@ -33,6 +33,7 @@ extern bool msi_nonbroken;
 void msi_set_message(PCIDevice *dev, MSIMessage msg);
 MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector);
 bool msi_enabled(const PCIDevice *dev);
+void msi_set_enabled(PCIDevice *dev);
 int msi_init(struct PCIDevice *dev, uint8_t offset,
              unsigned int nr_vectors, bool msi64bit,
              bool msi_per_vector_mask, Error **errp);
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index 76bdd9d7ea..6d307cdcad 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -43,6 +43,7 @@
 #include "qemu/error-report.h"
 #include "qemu/memalign.h"
 #include "hw/i386/x86.h"
+#include "hw/i386/kvm/xen_evtchn.h"
 #include "hw/i386/pc.h"
 #include "hw/i386/apic.h"
 #include "hw/i386/apic_internal.h"
@@ -5640,6 +5641,13 @@ int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
         }
     }
 
+#ifdef CONFIG_XEN_EMU
+    if (xen_mode == XEN_EMULATE &&
+        xen_evtchn_translate_pirq_msi(route, address, data)) {
+        return 0;
+    }
+#endif
+
     address = kvm_swizzle_msi_ext_dest_id(address);
     route->u.msi.address_hi = address >> VTD_MSI_ADDR_HI_SHIFT;
     route->u.msi.address_lo = address & VTD_MSI_ADDR_LO_MASK;
@@ -5659,8 +5667,8 @@ struct MSIRouteEntry {
 static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \
     QLIST_HEAD_INITIALIZER(msi_route_list);
 
-static void kvm_update_msi_routes_all(void *private, bool global,
-                                      uint32_t index, uint32_t mask)
+void kvm_update_msi_routes_all(void *private, bool global,
+                               uint32_t index, uint32_t mask)
 {
     int cnt = 0, vector;
     MSIRouteEntry *entry;
diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h
index 6a5c24e3dc..e24753abfe 100644
--- a/target/i386/kvm/kvm_i386.h
+++ b/target/i386/kvm/kvm_i386.h
@@ -51,6 +51,8 @@ bool kvm_hv_vpindex_settable(void);
 bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp);
 
 uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
+void kvm_update_msi_routes_all(void *private, bool global,
+                               uint32_t index, uint32_t mask);
 
 bool kvm_enable_sgx_provisioning(KVMState *s);
 void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask);
diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c
index 03b999d0b5..96b9d7f43d 100644
--- a/target/i386/kvm/xen-emu.c
+++ b/target/i386/kvm/xen-emu.c
@@ -266,7 +266,8 @@ static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
                          1 << XENFEAT_auto_translated_physmap |
                          1 << XENFEAT_supervisor_mode_kernel |
                          1 << XENFEAT_hvm_callback_vector |
-                         1 << XENFEAT_hvm_safe_pvclock;
+                         1 << XENFEAT_hvm_safe_pvclock |
+                         1 << XENFEAT_hvm_pirqs;
         }
 
         err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
-- 
2.39.0