Introduce basic plumbing for vfio-user behind a new
--enable-vfio-user-client option.
We introduce VFIOUserContainer in hw/vfio-user/container.c, which is a
container type for the "IOMMU" type "vfio-iommu-user", and share some
common container code from hw/vfio/container.c.
Add hw/vfio-user/pci.c for instantiating VFIOUserPCIDevice objects,
sharing some common code from hw/vfio/pci.c.
Originally-by: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John Levon <john.levon@nutanix.com>
---
MAINTAINERS | 2 +
hw/vfio-user/container.h | 25 +++
include/hw/vfio/vfio-container-base.h | 1 +
hw/vfio-user/container.c | 212 ++++++++++++++++++++++++++
hw/vfio-user/pci.c | 155 +++++++++++++++++++
hw/meson.build | 1 +
hw/vfio-user/meson.build | 9 ++
meson_options.txt | 2 +
scripts/meson-buildoptions.sh | 4 +
9 files changed, 411 insertions(+)
create mode 100644 hw/vfio-user/container.h
create mode 100644 hw/vfio-user/container.c
create mode 100644 hw/vfio-user/pci.c
create mode 100644 hw/vfio-user/meson.build
diff --git a/MAINTAINERS b/MAINTAINERS
index dbc393989a..328bab8d19 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4252,6 +4252,8 @@ M: John Levon <john.levon@nutanix.com>
M: Thanos Makatos <thanos.makatos@nutanix.com>
S: Supported
F: docs/devel/vfio-user.rst
+F: hw/vfio-user/*
+F: include/hw/vfio-user/*
F: subprojects/libvfio-user
EBPF:
diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h
new file mode 100644
index 0000000000..3cd3303e68
--- /dev/null
+++ b/hw/vfio-user/container.h
@@ -0,0 +1,25 @@
+/*
+ * vfio-user specific definitions.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_USER_CONTAINER_H
+#define HW_VFIO_USER_CONTAINER_H
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+#include "hw/vfio/vfio-container-base.h"
+
+/* MMU container sub-class for vfio-user. */
+typedef struct VFIOUserContainer {
+ VFIOContainerBase bcontainer;
+} VFIOUserContainer;
+
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER);
+
+#endif /* HW_VFIO_USER_CONTAINER_H */
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index 9d37f86115..28899ca0a6 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -109,6 +109,7 @@ vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer)
#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
+#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user"
OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c
new file mode 100644
index 0000000000..2892845b4f
--- /dev/null
+++ b/hw/vfio-user/container.c
@@ -0,0 +1,212 @@
+/*
+ * Container for vfio-user IOMMU type: rather than communicating with the kernel
+ * vfio driver, we communicate over a socket to a server using the vfio-user
+ * protocol.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <sys/ioctl.h>
+#include <linux/vfio.h>
+#include "qemu/osdep.h"
+
+#include "hw/vfio-user/container.h"
+#include "hw/vfio/vfio-cpr.h"
+#include "hw/vfio/vfio-device.h"
+#include "hw/vfio/vfio-listener.h"
+#include "qapi/error.h"
+#include "trace.h"
+
+static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb, bool unmap_all)
+{
+ return -ENOTSUP;
+}
+
+static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mrp)
+{
+ return -ENOTSUP;
+}
+
+static int
+vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
+ bool start, Error **errp)
+{
+ error_setg_errno(errp, ENOTSUP, "Not supported");
+ return -ENOTSUP;
+}
+
+static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
+ VFIOBitmap *vbmap, hwaddr iova,
+ hwaddr size, Error **errp)
+{
+ error_setg_errno(errp, ENOTSUP, "Not supported");
+ return -ENOTSUP;
+}
+
+static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
+{
+ error_setg_errno(errp, ENOTSUP, "Not supported");
+ return -ENOTSUP;
+}
+
+static VFIOUserContainer *vfio_user_create_container(Error **errp)
+{
+ VFIOUserContainer *container;
+
+ container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
+ return container;
+}
+
+/*
+ * Try to mirror vfio_container_connect() as much as possible.
+ */
+static VFIOUserContainer *
+vfio_user_container_connect(AddressSpace *as, Error **errp)
+{
+ VFIOContainerBase *bcontainer;
+ VFIOUserContainer *container;
+ VFIOAddressSpace *space;
+ VFIOIOMMUClass *vioc;
+
+ space = vfio_address_space_get(as);
+
+ container = vfio_user_create_container(errp);
+ if (!container) {
+ goto put_space_exit;
+ }
+
+ bcontainer = &container->bcontainer;
+
+ if (!vfio_cpr_register_container(bcontainer, errp)) {
+ goto free_container_exit;
+ }
+
+ vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ assert(vioc->setup);
+
+ if (!vioc->setup(bcontainer, errp)) {
+ goto unregister_container_exit;
+ }
+
+ vfio_address_space_insert(space, bcontainer);
+
+ if (!vfio_listener_register(bcontainer, errp)) {
+ goto listener_release_exit;
+ }
+
+ bcontainer->initialized = true;
+
+ return container;
+
+listener_release_exit:
+ vfio_listener_unregister(bcontainer);
+ if (vioc->release) {
+ vioc->release(bcontainer);
+ }
+
+unregister_container_exit:
+ vfio_cpr_unregister_container(bcontainer);
+
+free_container_exit:
+ object_unref(container);
+
+put_space_exit:
+ vfio_address_space_put(space);
+
+ return NULL;
+}
+
+static void vfio_user_container_disconnect(VFIOUserContainer *container)
+{
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+ vfio_listener_unregister(bcontainer);
+ if (vioc->release) {
+ vioc->release(bcontainer);
+ }
+
+ VFIOAddressSpace *space = bcontainer->space;
+
+ vfio_cpr_unregister_container(bcontainer);
+ object_unref(container);
+
+ vfio_address_space_put(space);
+}
+
+static bool vfio_user_device_get(VFIOUserContainer *container,
+ VFIODevice *vbasedev, Error **errp)
+{
+ struct vfio_device_info info = { 0 };
+
+ vbasedev->fd = -1;
+
+ vfio_device_prepare(vbasedev, &container->bcontainer, &info);
+
+ return true;
+}
+
+/*
+ * vfio_user_device_attach: attach a device to a new container.
+ */
+static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev,
+ AddressSpace *as, Error **errp)
+{
+ VFIOUserContainer *container;
+
+ container = vfio_user_container_connect(as, errp);
+ if (container == NULL) {
+ error_prepend(errp, "failed to connect proxy");
+ return false;
+ }
+
+ return vfio_user_device_get(container, vbasedev, errp);
+}
+
+static void vfio_user_device_detach(VFIODevice *vbasedev)
+{
+ VFIOUserContainer *container = container_of(vbasedev->bcontainer,
+ VFIOUserContainer, bcontainer);
+
+ vfio_device_unprepare(vbasedev);
+
+ vfio_user_container_disconnect(container);
+}
+
+static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
+{
+ /* ->needs_reset is always false for vfio-user. */
+ return 0;
+}
+
+static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
+{
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
+
+ vioc->setup = vfio_user_setup;
+ vioc->dma_map = vfio_user_dma_map;
+ vioc->dma_unmap = vfio_user_dma_unmap;
+ vioc->attach_device = vfio_user_device_attach;
+ vioc->detach_device = vfio_user_device_detach;
+ vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
+ vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
+ vioc->pci_hot_reset = vfio_user_pci_hot_reset;
+};
+
+static const TypeInfo types[] = {
+ {
+ .name = TYPE_VFIO_IOMMU_USER,
+ .parent = TYPE_VFIO_IOMMU,
+ .instance_size = sizeof(VFIOUserContainer),
+ .class_init = vfio_iommu_user_class_init,
+ },
+};
+
+DEFINE_TYPES(types)
diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c
new file mode 100644
index 0000000000..74b0c61f9b
--- /dev/null
+++ b/hw/vfio-user/pci.c
@@ -0,0 +1,155 @@
+/*
+ * vfio PCI device over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <sys/ioctl.h>
+#include "qemu/osdep.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/vfio/pci.h"
+
+#define TYPE_VFIO_USER_PCI "vfio-user-pci"
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI)
+
+struct VFIOUserPCIDevice {
+ VFIOPCIDevice device;
+ char *sock_name;
+};
+
+/*
+ * Emulated devices don't use host hot reset
+ */
+static void vfio_user_compute_needs_reset(VFIODevice *vbasedev)
+{
+ vbasedev->needs_reset = false;
+}
+
+static Object *vfio_user_pci_get_object(VFIODevice *vbasedev)
+{
+ VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice,
+ device.vbasedev);
+
+ return OBJECT(vdev);
+}
+
+static VFIODeviceOps vfio_user_pci_ops = {
+ .vfio_compute_needs_reset = vfio_user_compute_needs_reset,
+ .vfio_eoi = vfio_pci_intx_eoi,
+ .vfio_get_object = vfio_user_pci_get_object,
+ /* No live migration support yet. */
+ .vfio_save_config = NULL,
+ .vfio_load_config = NULL,
+};
+
+static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
+{
+ ERRP_GUARD();
+ VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+ AddressSpace *as;
+
+ /*
+ * TODO: make option parser understand SocketAddress
+ * and use that instead of having scalar options
+ * for each socket type.
+ */
+ if (!udev->sock_name) {
+ error_setg(errp, "No socket specified");
+ error_append_hint(errp, "Use -device vfio-user-pci,socket=<name>\n");
+ return;
+ }
+
+ vbasedev->name = g_strdup_printf("VFIO user <%s>", udev->sock_name);
+
+ /*
+ * vfio-user devices are effectively mdevs (don't use a host iommu).
+ */
+ vbasedev->mdev = true;
+
+ as = pci_device_iommu_address_space(pdev);
+ if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER,
+ vbasedev->name, vbasedev,
+ as, errp)) {
+ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
+ return;
+ }
+}
+
+static void vfio_user_instance_init(Object *obj)
+{
+ PCIDevice *pci_dev = PCI_DEVICE(obj);
+ VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+
+ device_add_bootindex_property(obj, &vdev->bootindex,
+ "bootindex", NULL,
+ &pci_dev->qdev);
+ vdev->host.domain = ~0U;
+ vdev->host.bus = ~0U;
+ vdev->host.slot = ~0U;
+ vdev->host.function = ~0U;
+
+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops,
+ DEVICE(vdev), false);
+
+ vdev->nv_gpudirect_clique = 0xFF;
+
+ /*
+ * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
+ * line, therefore, no need to wait to realize like other devices.
+ */
+ pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
+}
+
+static void vfio_user_instance_finalize(Object *obj)
+{
+ VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+
+ vfio_pci_put_device(vdev);
+}
+
+static const Property vfio_user_pci_dev_properties[] = {
+ DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice,
+ vendor_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice,
+ device_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
+ sub_vendor_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
+ sub_device_id, PCI_ANY_ID),
+ DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name),
+};
+
+static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
+
+ device_class_set_props(dc, vfio_user_pci_dev_properties);
+ dc->desc = "VFIO over socket PCI device assignment";
+ pdc->realize = vfio_user_pci_realize;
+}
+
+static const TypeInfo vfio_user_pci_dev_info = {
+ .name = TYPE_VFIO_USER_PCI,
+ .parent = TYPE_VFIO_PCI_BASE,
+ .instance_size = sizeof(VFIOUserPCIDevice),
+ .class_init = vfio_user_pci_dev_class_init,
+ .instance_init = vfio_user_instance_init,
+ .instance_finalize = vfio_user_instance_finalize,
+};
+
+static void register_vfio_user_dev_type(void)
+{
+ type_register_static(&vfio_user_pci_dev_info);
+}
+
+ type_init(register_vfio_user_dev_type)
diff --git a/hw/meson.build b/hw/meson.build
index b91f761fe0..791ce21ab4 100644
--- a/hw/meson.build
+++ b/hw/meson.build
@@ -39,6 +39,7 @@ subdir('uefi')
subdir('ufs')
subdir('usb')
subdir('vfio')
+subdir('vfio-user')
subdir('virtio')
subdir('vmapple')
subdir('watchdog')
diff --git a/hw/vfio-user/meson.build b/hw/vfio-user/meson.build
new file mode 100644
index 0000000000..f1fee70c85
--- /dev/null
+++ b/hw/vfio-user/meson.build
@@ -0,0 +1,9 @@
+vfio_user_ss = ss.source_set()
+vfio_user_ss.add(files(
+ 'container.c',
+ 'pci.c',
+))
+
+if get_option('vfio_user_client').enabled()
+ specific_ss.add_all(vfio_user_ss)
+endif
diff --git a/meson_options.txt b/meson_options.txt
index a442be2995..97d3db44cd 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -109,6 +109,8 @@ option('multiprocess', type: 'feature', value: 'auto',
description: 'Out of process device emulation support')
option('relocatable', type : 'boolean', value : true,
description: 'toggle relocatable install')
+option('vfio_user_client', type: 'feature', value: 'disabled',
+ description: 'vfio-user client support')
option('vfio_user_server', type: 'feature', value: 'disabled',
description: 'vfio-user server support')
option('dbus_display', type: 'feature', value: 'auto',
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
index f09ef9604f..2c5673769a 100644
--- a/scripts/meson-buildoptions.sh
+++ b/scripts/meson-buildoptions.sh
@@ -201,6 +201,8 @@ meson_options_help() {
printf "%s\n" ' vdi vdi image format support'
printf "%s\n" ' vduse-blk-export'
printf "%s\n" ' VDUSE block export support'
+ printf "%s\n" ' vfio-user-client'
+ printf "%s\n" ' vfio-user client support'
printf "%s\n" ' vfio-user-server'
printf "%s\n" ' vfio-user server support'
printf "%s\n" ' vhdx vhdx image format support'
@@ -529,6 +531,8 @@ _meson_option_parse() {
--disable-vdi) printf "%s" -Dvdi=disabled ;;
--enable-vduse-blk-export) printf "%s" -Dvduse_blk_export=enabled ;;
--disable-vduse-blk-export) printf "%s" -Dvduse_blk_export=disabled ;;
+ --enable-vfio-user-client) printf "%s" -Dvfio_user_client=enabled ;;
+ --disable-vfio-user-client) printf "%s" -Dvfio_user_client=disabled ;;
--enable-vfio-user-server) printf "%s" -Dvfio_user_server=enabled ;;
--disable-vfio-user-server) printf "%s" -Dvfio_user_server=disabled ;;
--enable-vhdx) printf "%s" -Dvhdx=enabled ;;
--
2.43.0
On 6/7/25 02:10, John Levon wrote:
> Introduce basic plumbing for vfio-user behind a new
> --enable-vfio-user-client option.
>
> We introduce VFIOUserContainer in hw/vfio-user/container.c, which is a
> container type for the "IOMMU" type "vfio-iommu-user", and share some
> common container code from hw/vfio/container.c.
>
> Add hw/vfio-user/pci.c for instantiating VFIOUserPCIDevice objects,
> sharing some common code from hw/vfio/pci.c.
>
> Originally-by: John Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: John Levon <john.levon@nutanix.com>
> ---
> MAINTAINERS | 2 +
> hw/vfio-user/container.h | 25 +++
> include/hw/vfio/vfio-container-base.h | 1 +
> hw/vfio-user/container.c | 212 ++++++++++++++++++++++++++
> hw/vfio-user/pci.c | 155 +++++++++++++++++++
> hw/meson.build | 1 +
> hw/vfio-user/meson.build | 9 ++
> meson_options.txt | 2 +
> scripts/meson-buildoptions.sh | 4 +
> 9 files changed, 411 insertions(+)
> create mode 100644 hw/vfio-user/container.h
> create mode 100644 hw/vfio-user/container.c
> create mode 100644 hw/vfio-user/pci.c
> create mode 100644 hw/vfio-user/meson.build
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index dbc393989a..328bab8d19 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -4252,6 +4252,8 @@ M: John Levon <john.levon@nutanix.com>
> M: Thanos Makatos <thanos.makatos@nutanix.com>
> S: Supported
> F: docs/devel/vfio-user.rst
> +F: hw/vfio-user/*
> +F: include/hw/vfio-user/*
> F: subprojects/libvfio-user
>
> EBPF:
> diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h
> new file mode 100644
> index 0000000000..3cd3303e68
> --- /dev/null
> +++ b/hw/vfio-user/container.h
> @@ -0,0 +1,25 @@
> +/*
> + * vfio-user specific definitions.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2. See
> + * the COPYING file in the top-level directory.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef HW_VFIO_USER_CONTAINER_H
> +#define HW_VFIO_USER_CONTAINER_H
> +
> +#include <inttypes.h>
> +#include <stdbool.h>
> +
> +#include "hw/vfio/vfio-container-base.h"
> +
> +/* MMU container sub-class for vfio-user. */
> +typedef struct VFIOUserContainer {
> + VFIOContainerBase bcontainer;
> +} VFIOUserContainer;
> +
> +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER);
> +
> +#endif /* HW_VFIO_USER_CONTAINER_H */
> diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
> index 9d37f86115..28899ca0a6 100644
> --- a/include/hw/vfio/vfio-container-base.h
> +++ b/include/hw/vfio/vfio-container-base.h
> @@ -109,6 +109,7 @@ vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer)
> #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
> #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
> #define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
> +#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user"
>
> OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
>
> diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c
> new file mode 100644
> index 0000000000..2892845b4f
> --- /dev/null
> +++ b/hw/vfio-user/container.c
> @@ -0,0 +1,212 @@
> +/*
> + * Container for vfio-user IOMMU type: rather than communicating with the kernel
> + * vfio driver, we communicate over a socket to a server using the vfio-user
> + * protocol.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2. See
> + * the COPYING file in the top-level directory.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include <sys/ioctl.h>
> +#include <linux/vfio.h>
> +#include "qemu/osdep.h"
> +
> +#include "hw/vfio-user/container.h"
> +#include "hw/vfio/vfio-cpr.h"
> +#include "hw/vfio/vfio-device.h"
> +#include "hw/vfio/vfio-listener.h"
> +#include "qapi/error.h"
> +#include "trace.h"
> +
> +static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
> + hwaddr iova, ram_addr_t size,
> + IOMMUTLBEntry *iotlb, bool unmap_all)
> +{
> + return -ENOTSUP;
> +}
> +
> +static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
> + ram_addr_t size, void *vaddr, bool readonly,
> + MemoryRegion *mrp)
> +{
> + return -ENOTSUP;
> +}
> +
> +static int
> +vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
> + bool start, Error **errp)
> +{
> + error_setg_errno(errp, ENOTSUP, "Not supported");
> + return -ENOTSUP;
> +}
> +
> +static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
> + VFIOBitmap *vbmap, hwaddr iova,
> + hwaddr size, Error **errp)
> +{
> + error_setg_errno(errp, ENOTSUP, "Not supported");
> + return -ENOTSUP;
> +}
> +
> +static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
> +{
> + error_setg_errno(errp, ENOTSUP, "Not supported");
> + return -ENOTSUP;
> +}
> +
> +static VFIOUserContainer *vfio_user_create_container(Error **errp)
> +{
> + VFIOUserContainer *container;
> +
> + container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
> + return container;
> +}
> +
> +/*
> + * Try to mirror vfio_container_connect() as much as possible.
> + */
> +static VFIOUserContainer *
> +vfio_user_container_connect(AddressSpace *as, Error **errp)
> +{
> + VFIOContainerBase *bcontainer;
> + VFIOUserContainer *container;
> + VFIOAddressSpace *space;
> + VFIOIOMMUClass *vioc;
> +
> + space = vfio_address_space_get(as);
> +
> + container = vfio_user_create_container(errp);
> + if (!container) {
> + goto put_space_exit;
> + }
> +
> + bcontainer = &container->bcontainer;
> +
> + if (!vfio_cpr_register_container(bcontainer, errp)) {
> + goto free_container_exit;
> + }
> +
> + vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
> + assert(vioc->setup);
> +
> + if (!vioc->setup(bcontainer, errp)) {
> + goto unregister_container_exit;
> + }
> +
> + vfio_address_space_insert(space, bcontainer);
> +
> + if (!vfio_listener_register(bcontainer, errp)) {
> + goto listener_release_exit;
> + }
> +
> + bcontainer->initialized = true;
> +
> + return container;
> +
> +listener_release_exit:
> + vfio_listener_unregister(bcontainer);
> + if (vioc->release) {
> + vioc->release(bcontainer);
> + }
> +
> +unregister_container_exit:
> + vfio_cpr_unregister_container(bcontainer);
> +
> +free_container_exit:
> + object_unref(container);
> +
> +put_space_exit:
> + vfio_address_space_put(space);
> +
> + return NULL;
> +}
> +
> +static void vfio_user_container_disconnect(VFIOUserContainer *container)
> +{
> + VFIOContainerBase *bcontainer = &container->bcontainer;
> + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
> +
> + vfio_listener_unregister(bcontainer);
> + if (vioc->release) {
> + vioc->release(bcontainer);
> + }
> +
> + VFIOAddressSpace *space = bcontainer->space;
> +
> + vfio_cpr_unregister_container(bcontainer);
> + object_unref(container);
> +
> + vfio_address_space_put(space);
> +}
> +
> +static bool vfio_user_device_get(VFIOUserContainer *container,
> + VFIODevice *vbasedev, Error **errp)
> +{
> + struct vfio_device_info info = { 0 };
> +
> + vbasedev->fd = -1;
> +
> + vfio_device_prepare(vbasedev, &container->bcontainer, &info);
> +
> + return true;
> +}
> +
> +/*
> + * vfio_user_device_attach: attach a device to a new container.
> + */
> +static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev,
> + AddressSpace *as, Error **errp)
> +{
> + VFIOUserContainer *container;
> +
> + container = vfio_user_container_connect(as, errp);
> + if (container == NULL) {
> + error_prepend(errp, "failed to connect proxy");
> + return false;
> + }
> +
> + return vfio_user_device_get(container, vbasedev, errp);
> +}
> +
> +static void vfio_user_device_detach(VFIODevice *vbasedev)
> +{
> + VFIOUserContainer *container = container_of(vbasedev->bcontainer,
> + VFIOUserContainer, bcontainer);
> +
> + vfio_device_unprepare(vbasedev);
> +
> + vfio_user_container_disconnect(container);
> +}
> +
> +static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
> +{
> + /* ->needs_reset is always false for vfio-user. */
> + return 0;
> +}
> +
> +static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
> +{
> + VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
> +
> + vioc->setup = vfio_user_setup;
> + vioc->dma_map = vfio_user_dma_map;
> + vioc->dma_unmap = vfio_user_dma_unmap;
> + vioc->attach_device = vfio_user_device_attach;
> + vioc->detach_device = vfio_user_device_detach;
> + vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
> + vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
> + vioc->pci_hot_reset = vfio_user_pci_hot_reset;
> +};
> +
> +static const TypeInfo types[] = {
> + {
> + .name = TYPE_VFIO_IOMMU_USER,
> + .parent = TYPE_VFIO_IOMMU,
> + .instance_size = sizeof(VFIOUserContainer),
> + .class_init = vfio_iommu_user_class_init,
> + },
> +};
> +
> +DEFINE_TYPES(types)
> diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c
> new file mode 100644
> index 0000000000..74b0c61f9b
> --- /dev/null
> +++ b/hw/vfio-user/pci.c
> @@ -0,0 +1,155 @@
> +/*
> + * vfio PCI device over a UNIX socket.
> + *
> + * Copyright © 2018, 2021 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include <sys/ioctl.h>
> +#include "qemu/osdep.h"
> +
> +#include "hw/qdev-properties.h"
> +#include "hw/vfio/pci.h"
> +
> +#define TYPE_VFIO_USER_PCI "vfio-user-pci"
> +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI)
> +
> +struct VFIOUserPCIDevice {
> + VFIOPCIDevice device;
> + char *sock_name;
> +};
> +
> +/*
> + * Emulated devices don't use host hot reset
> + */
> +static void vfio_user_compute_needs_reset(VFIODevice *vbasedev)
> +{
> + vbasedev->needs_reset = false;
> +}
> +
> +static Object *vfio_user_pci_get_object(VFIODevice *vbasedev)
> +{
> + VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice,
> + device.vbasedev);
> +
> + return OBJECT(vdev);
> +}
> +
> +static VFIODeviceOps vfio_user_pci_ops = {
> + .vfio_compute_needs_reset = vfio_user_compute_needs_reset,
> + .vfio_eoi = vfio_pci_intx_eoi,
> + .vfio_get_object = vfio_user_pci_get_object,
> + /* No live migration support yet. */
> + .vfio_save_config = NULL,
> + .vfio_load_config = NULL,
> +};
> +
> +static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
> +{
> + ERRP_GUARD();
> + VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + AddressSpace *as;
> +
> + /*
> + * TODO: make option parser understand SocketAddress
> + * and use that instead of having scalar options
> + * for each socket type.
Please take a look at vfu_object_class_init() to handle the
"socket" property.
C.
> + */
> + if (!udev->sock_name) {
> + error_setg(errp, "No socket specified");
> + error_append_hint(errp, "Use -device vfio-user-pci,socket=<name>\n");
> + return;
> + }
> +
> + vbasedev->name = g_strdup_printf("VFIO user <%s>", udev->sock_name);
> +
> + /*
> + * vfio-user devices are effectively mdevs (don't use a host iommu).
> + */
> + vbasedev->mdev = true;
> +
> + as = pci_device_iommu_address_space(pdev);
> + if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER,
> + vbasedev->name, vbasedev,
> + as, errp)) {
> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
> + return;
> + }
> +}
> +
> +static void vfio_user_instance_init(Object *obj)
> +{
> + PCIDevice *pci_dev = PCI_DEVICE(obj);
> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
> + VFIODevice *vbasedev = &vdev->vbasedev;
> +
> + device_add_bootindex_property(obj, &vdev->bootindex,
> + "bootindex", NULL,
> + &pci_dev->qdev);
> + vdev->host.domain = ~0U;
> + vdev->host.bus = ~0U;
> + vdev->host.slot = ~0U;
> + vdev->host.function = ~0U;
> +
> + vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops,
> + DEVICE(vdev), false);
> +
> + vdev->nv_gpudirect_clique = 0xFF;
> +
> + /*
> + * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
> + * line, therefore, no need to wait to realize like other devices.
> + */
> + pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
> +}
> +
> +static void vfio_user_instance_finalize(Object *obj)
> +{
> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
> +
> + vfio_pci_put_device(vdev);
> +}
> +
> +static const Property vfio_user_pci_dev_properties[] = {
> + DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice,
> + vendor_id, PCI_ANY_ID),
> + DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice,
> + device_id, PCI_ANY_ID),
> + DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
> + sub_vendor_id, PCI_ANY_ID),
> + DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
> + sub_device_id, PCI_ANY_ID),
> + DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name),
> +};
> +
> +static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data)
> +{
> + DeviceClass *dc = DEVICE_CLASS(klass);
> + PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
> +
> + device_class_set_props(dc, vfio_user_pci_dev_properties);
> + dc->desc = "VFIO over socket PCI device assignment";
> + pdc->realize = vfio_user_pci_realize;
> +}
> +
> +static const TypeInfo vfio_user_pci_dev_info = {
> + .name = TYPE_VFIO_USER_PCI,
> + .parent = TYPE_VFIO_PCI_BASE,
> + .instance_size = sizeof(VFIOUserPCIDevice),
> + .class_init = vfio_user_pci_dev_class_init,
> + .instance_init = vfio_user_instance_init,
> + .instance_finalize = vfio_user_instance_finalize,
> +};
> +
> +static void register_vfio_user_dev_type(void)
> +{
> + type_register_static(&vfio_user_pci_dev_info);
> +}
> +
> + type_init(register_vfio_user_dev_type)
> diff --git a/hw/meson.build b/hw/meson.build
> index b91f761fe0..791ce21ab4 100644
> --- a/hw/meson.build
> +++ b/hw/meson.build
> @@ -39,6 +39,7 @@ subdir('uefi')
> subdir('ufs')
> subdir('usb')
> subdir('vfio')
> +subdir('vfio-user')
> subdir('virtio')
> subdir('vmapple')
> subdir('watchdog')
> diff --git a/hw/vfio-user/meson.build b/hw/vfio-user/meson.build
> new file mode 100644
> index 0000000000..f1fee70c85
> --- /dev/null
> +++ b/hw/vfio-user/meson.build
> @@ -0,0 +1,9 @@
> +vfio_user_ss = ss.source_set()
> +vfio_user_ss.add(files(
> + 'container.c',
> + 'pci.c',
> +))
> +
> +if get_option('vfio_user_client').enabled()
> + specific_ss.add_all(vfio_user_ss)
> +endif
> diff --git a/meson_options.txt b/meson_options.txt
> index a442be2995..97d3db44cd 100644
> --- a/meson_options.txt
> +++ b/meson_options.txt
> @@ -109,6 +109,8 @@ option('multiprocess', type: 'feature', value: 'auto',
> description: 'Out of process device emulation support')
> option('relocatable', type : 'boolean', value : true,
> description: 'toggle relocatable install')
> +option('vfio_user_client', type: 'feature', value: 'disabled',
> + description: 'vfio-user client support')
> option('vfio_user_server', type: 'feature', value: 'disabled',
> description: 'vfio-user server support')
> option('dbus_display', type: 'feature', value: 'auto',
> diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
> index f09ef9604f..2c5673769a 100644
> --- a/scripts/meson-buildoptions.sh
> +++ b/scripts/meson-buildoptions.sh
> @@ -201,6 +201,8 @@ meson_options_help() {
> printf "%s\n" ' vdi vdi image format support'
> printf "%s\n" ' vduse-blk-export'
> printf "%s\n" ' VDUSE block export support'
> + printf "%s\n" ' vfio-user-client'
> + printf "%s\n" ' vfio-user client support'
> printf "%s\n" ' vfio-user-server'
> printf "%s\n" ' vfio-user server support'
> printf "%s\n" ' vhdx vhdx image format support'
> @@ -529,6 +531,8 @@ _meson_option_parse() {
> --disable-vdi) printf "%s" -Dvdi=disabled ;;
> --enable-vduse-blk-export) printf "%s" -Dvduse_blk_export=enabled ;;
> --disable-vduse-blk-export) printf "%s" -Dvduse_blk_export=disabled ;;
> + --enable-vfio-user-client) printf "%s" -Dvfio_user_client=enabled ;;
> + --disable-vfio-user-client) printf "%s" -Dvfio_user_client=disabled ;;
> --enable-vfio-user-server) printf "%s" -Dvfio_user_server=enabled ;;
> --disable-vfio-user-server) printf "%s" -Dvfio_user_server=disabled ;;
> --enable-vhdx) printf "%s" -Dvhdx=enabled ;;
On Thu, Jun 12, 2025 at 08:39:17AM +0200, Cédric Le Goater wrote:
> > +static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
> > +{
> > + ERRP_GUARD();
> > + VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
> > + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
> > + VFIODevice *vbasedev = &vdev->vbasedev;
> > + AddressSpace *as;
> > +
> > + /*
> > + * TODO: make option parser understand SocketAddress
> > + * and use that instead of having scalar options
> > + * for each socket type.
>
> Please take a look at vfu_object_class_init() to handle the
> "socket" property.
Thanks for the pointer: done. I had to switch to using JSON-style device
specification for this though, so updated docs too.
regards
john
On 07/06/2025 01:10, John Levon wrote:
> Introduce basic plumbing for vfio-user behind a new
> --enable-vfio-user-client option.
>
> We introduce VFIOUserContainer in hw/vfio-user/container.c, which is a
> container type for the "IOMMU" type "vfio-iommu-user", and share some
> common container code from hw/vfio/container.c.
>
> Add hw/vfio-user/pci.c for instantiating VFIOUserPCIDevice objects,
> sharing some common code from hw/vfio/pci.c.
>
> Originally-by: John Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: John Levon <john.levon@nutanix.com>
> ---
> MAINTAINERS | 2 +
> hw/vfio-user/container.h | 25 +++
> include/hw/vfio/vfio-container-base.h | 1 +
> hw/vfio-user/container.c | 212 ++++++++++++++++++++++++++
> hw/vfio-user/pci.c | 155 +++++++++++++++++++
> hw/meson.build | 1 +
> hw/vfio-user/meson.build | 9 ++
> meson_options.txt | 2 +
> scripts/meson-buildoptions.sh | 4 +
> 9 files changed, 411 insertions(+)
> create mode 100644 hw/vfio-user/container.h
> create mode 100644 hw/vfio-user/container.c
> create mode 100644 hw/vfio-user/pci.c
> create mode 100644 hw/vfio-user/meson.build
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index dbc393989a..328bab8d19 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -4252,6 +4252,8 @@ M: John Levon <john.levon@nutanix.com>
> M: Thanos Makatos <thanos.makatos@nutanix.com>
> S: Supported
> F: docs/devel/vfio-user.rst
> +F: hw/vfio-user/*
> +F: include/hw/vfio-user/*
> F: subprojects/libvfio-user
Question: how do you see the division between hw/vfio and hw/vfio-user?
My initial feeling is that there is substantial sharing between the two,
in which case I'd expect the files to be in hw/vfio as e.g.
hw/vfio/container-user.c etc. instead of its own directory.
Cédric, what are your thoughts here?
> EBPF:
> diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h
> new file mode 100644
> index 0000000000..3cd3303e68
> --- /dev/null
> +++ b/hw/vfio-user/container.h
> @@ -0,0 +1,25 @@
> +/*
> + * vfio-user specific definitions.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2. See
> + * the COPYING file in the top-level directory.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef HW_VFIO_USER_CONTAINER_H
> +#define HW_VFIO_USER_CONTAINER_H
> +
> +#include <inttypes.h>
> +#include <stdbool.h>
These shouldn't be included directly: you probably just want:
#include "qemu/osdep.h"
instead.
> +#include "hw/vfio/vfio-container-base.h"
> +
> +/* MMU container sub-class for vfio-user. */
> +typedef struct VFIOUserContainer {
> + VFIOContainerBase bcontainer;
> +} VFIOUserContainer;
> +
> +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER);
As per the documentation at
https://qemu-project.gitlab.io/qemu/devel/style.html#qemu-object-model-declarations
the parent object should always be named parent_obj and struct shouldn't
have a typedef i.e.
/* MMU container sub-class for vfio-user. */
struct VFIOUserContainer {
VFIOContainerBase parent_obj;
};
OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER);
> +#endif /* HW_VFIO_USER_CONTAINER_H */
> diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
> index 9d37f86115..28899ca0a6 100644
> --- a/include/hw/vfio/vfio-container-base.h
> +++ b/include/hw/vfio/vfio-container-base.h
> @@ -109,6 +109,7 @@ vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer)
> #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
> #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
> #define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
> +#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user"
>
> OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
>
> diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c
> new file mode 100644
> index 0000000000..2892845b4f
> --- /dev/null
> +++ b/hw/vfio-user/container.c
> @@ -0,0 +1,212 @@
> +/*
> + * Container for vfio-user IOMMU type: rather than communicating with the kernel
> + * vfio driver, we communicate over a socket to a server using the vfio-user
> + * protocol.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2. See
> + * the COPYING file in the top-level directory.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include <sys/ioctl.h>
> +#include <linux/vfio.h>
> +#include "qemu/osdep.h"
> +
> +#include "hw/vfio-user/container.h"
> +#include "hw/vfio/vfio-cpr.h"
> +#include "hw/vfio/vfio-device.h"
> +#include "hw/vfio/vfio-listener.h"
> +#include "qapi/error.h"
> +#include "trace.h"
> +
> +static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
> + hwaddr iova, ram_addr_t size,
> + IOMMUTLBEntry *iotlb, bool unmap_all)
> +{
> + return -ENOTSUP;
> +}
> +
> +static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
> + ram_addr_t size, void *vaddr, bool readonly,
> + MemoryRegion *mrp)
> +{
> + return -ENOTSUP;
> +}
> +
> +static int
> +vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
> + bool start, Error **errp)
> +{
> + error_setg_errno(errp, ENOTSUP, "Not supported");
> + return -ENOTSUP;
> +}
> +
> +static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
> + VFIOBitmap *vbmap, hwaddr iova,
> + hwaddr size, Error **errp)
> +{
> + error_setg_errno(errp, ENOTSUP, "Not supported");
> + return -ENOTSUP;
> +}
> +
> +static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
> +{
> + error_setg_errno(errp, ENOTSUP, "Not supported");
> + return -ENOTSUP;
> +}
> +
> +static VFIOUserContainer *vfio_user_create_container(Error **errp)
> +{
> + VFIOUserContainer *container;
> +
> + container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
> + return container;
> +}
> +
> +/*
> + * Try to mirror vfio_container_connect() as much as possible.
> + */
> +static VFIOUserContainer *
> +vfio_user_container_connect(AddressSpace *as, Error **errp)
> +{
> + VFIOContainerBase *bcontainer;
> + VFIOUserContainer *container;
> + VFIOAddressSpace *space;
> + VFIOIOMMUClass *vioc;
> +
> + space = vfio_address_space_get(as);
> +
> + container = vfio_user_create_container(errp);
> + if (!container) {
> + goto put_space_exit;
> + }
> +
> + bcontainer = &container->bcontainer;
References to the object hierarchy should always be done with the
automatically generated QOM cast macros since they are easier to read,
and also ensure type safety e.g.:
bcontainer = VFIO_IOMMU(container);
> + if (!vfio_cpr_register_container(bcontainer, errp)) {
> + goto free_container_exit;
> + }
> +
> + vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
> + assert(vioc->setup);
> +
> + if (!vioc->setup(bcontainer, errp)) {
> + goto unregister_container_exit;
> + }
> +
> + vfio_address_space_insert(space, bcontainer);
> +
> + if (!vfio_listener_register(bcontainer, errp)) {
> + goto listener_release_exit;
> + }
> +
> + bcontainer->initialized = true;
> +
> + return container;
> +
> +listener_release_exit:
> + vfio_listener_unregister(bcontainer);
> + if (vioc->release) {
> + vioc->release(bcontainer);
> + }
> +
> +unregister_container_exit:
> + vfio_cpr_unregister_container(bcontainer);
> +
> +free_container_exit:
> + object_unref(container);
> +
> +put_space_exit:
> + vfio_address_space_put(space);
> +
> + return NULL;
> +}
> +
> +static void vfio_user_container_disconnect(VFIOUserContainer *container)
> +{
> + VFIOContainerBase *bcontainer = &container->bcontainer;
Use a QOM cast here.
> + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
> +
> + vfio_listener_unregister(bcontainer);
> + if (vioc->release) {
> + vioc->release(bcontainer);
> + }
> +
> + VFIOAddressSpace *space = bcontainer->space;
> +
> + vfio_cpr_unregister_container(bcontainer);
> + object_unref(container);
> +
> + vfio_address_space_put(space);
> +}
> +
> +static bool vfio_user_device_get(VFIOUserContainer *container,
> + VFIODevice *vbasedev, Error **errp)
> +{
> + struct vfio_device_info info = { 0 };
> +
> + vbasedev->fd = -1;
> +
> + vfio_device_prepare(vbasedev, &container->bcontainer, &info);
Use a QOM cast here.
> + return true;
> +}
> +
> +/*
> + * vfio_user_device_attach: attach a device to a new container.
> + */
> +static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev,
> + AddressSpace *as, Error **errp)
> +{
> + VFIOUserContainer *container;
> +
> + container = vfio_user_container_connect(as, errp);
> + if (container == NULL) {
> + error_prepend(errp, "failed to connect proxy");
> + return false;
> + }
> +
> + return vfio_user_device_get(container, vbasedev, errp);
> +}
> +
> +static void vfio_user_device_detach(VFIODevice *vbasedev)
> +{
> + VFIOUserContainer *container = container_of(vbasedev->bcontainer,
> + VFIOUserContainer, bcontainer);
> +
> + vfio_device_unprepare(vbasedev);
> +
> + vfio_user_container_disconnect(container);
> +}
> +
> +static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
> +{
> + /* ->needs_reset is always false for vfio-user. */
> + return 0;
> +}
> +
> +static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
> +{
> + VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
> +
> + vioc->setup = vfio_user_setup;
> + vioc->dma_map = vfio_user_dma_map;
> + vioc->dma_unmap = vfio_user_dma_unmap;
> + vioc->attach_device = vfio_user_device_attach;
> + vioc->detach_device = vfio_user_device_detach;
> + vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
> + vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
> + vioc->pci_hot_reset = vfio_user_pci_hot_reset;
> +};
> +
> +static const TypeInfo types[] = {
> + {
> + .name = TYPE_VFIO_IOMMU_USER,
> + .parent = TYPE_VFIO_IOMMU,
> + .instance_size = sizeof(VFIOUserContainer),
> + .class_init = vfio_iommu_user_class_init,
> + },
> +};
> +
> +DEFINE_TYPES(types)
> diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c
> new file mode 100644
> index 0000000000..74b0c61f9b
> --- /dev/null
> +++ b/hw/vfio-user/pci.c
> @@ -0,0 +1,155 @@
> +/*
> + * vfio PCI device over a UNIX socket.
> + *
> + * Copyright © 2018, 2021 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include <sys/ioctl.h>
> +#include "qemu/osdep.h"
> +
> +#include "hw/qdev-properties.h"
> +#include "hw/vfio/pci.h"
> +
> +#define TYPE_VFIO_USER_PCI "vfio-user-pci"
> +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI)
> +
> +struct VFIOUserPCIDevice {
> + VFIOPCIDevice device;
> + char *sock_name;
> +};
Again as per the documentation link above, device should be called
parent_obj plus there should be a empty line between parent_obj and the
other members i.e.
struct VFIOUserPCIDevice {
VFIOPCIDevice parent_obj;
char *sock_name;
}
Note that by using QOM casts the name of the parent object member is not
exposed to the remainder of the code.
> +/*
> + * Emulated devices don't use host hot reset
> + */
> +static void vfio_user_compute_needs_reset(VFIODevice *vbasedev)
> +{
> + vbasedev->needs_reset = false;
> +}
> +
> +static Object *vfio_user_pci_get_object(VFIODevice *vbasedev)
> +{
> + VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice,
> + device.vbasedev);
Hmmm. Not for this series but this may suggest VFIODevice should be
QOMified.
> + return OBJECT(vdev);
> +}
> > +static VFIODeviceOps vfio_user_pci_ops = {
> + .vfio_compute_needs_reset = vfio_user_compute_needs_reset,
> + .vfio_eoi = vfio_pci_intx_eoi,
> + .vfio_get_object = vfio_user_pci_get_object,
> + /* No live migration support yet. */
> + .vfio_save_config = NULL,
> + .vfio_load_config = NULL,
> +};
> +
> +static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
> +{
> + ERRP_GUARD();
> + VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + AddressSpace *as;
> +
> + /*
> + * TODO: make option parser understand SocketAddress
> + * and use that instead of having scalar options
> + * for each socket type.
> + */
> + if (!udev->sock_name) {
> + error_setg(errp, "No socket specified");
> + error_append_hint(errp, "Use -device vfio-user-pci,socket=<name>\n");
> + return;
> + }
> +
> + vbasedev->name = g_strdup_printf("VFIO user <%s>", udev->sock_name);
> +
> + /*
> + * vfio-user devices are effectively mdevs (don't use a host iommu).
> + */
> + vbasedev->mdev = true;
> +
> + as = pci_device_iommu_address_space(pdev);
> + if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER,
> + vbasedev->name, vbasedev,
> + as, errp)) {
> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
> + return;
> + }
> +}
> +
> +static void vfio_user_instance_init(Object *obj)
> +{
> + PCIDevice *pci_dev = PCI_DEVICE(obj);
> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
> + VFIODevice *vbasedev = &vdev->vbasedev;
> +
> + device_add_bootindex_property(obj, &vdev->bootindex,
> + "bootindex", NULL,
> + &pci_dev->qdev);
> + vdev->host.domain = ~0U;
> + vdev->host.bus = ~0U;
> + vdev->host.slot = ~0U;
> + vdev->host.function = ~0U;
> +
> + vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops,
> + DEVICE(vdev), false);
> +
> + vdev->nv_gpudirect_clique = 0xFF;
> +
> + /*
> + * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
> + * line, therefore, no need to wait to realize like other devices.
> + */
> + pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
> +}
> +
> +static void vfio_user_instance_finalize(Object *obj)
> +{
> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
> +
> + vfio_pci_put_device(vdev);
> +}
> +
> +static const Property vfio_user_pci_dev_properties[] = {
> + DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice,
> + vendor_id, PCI_ANY_ID),
> + DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice,
> + device_id, PCI_ANY_ID),
> + DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
> + sub_vendor_id, PCI_ANY_ID),
> + DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
> + sub_device_id, PCI_ANY_ID),
> + DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name),
> +};
> +
> +static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data)
> +{
> + DeviceClass *dc = DEVICE_CLASS(klass);
> + PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
> +
> + device_class_set_props(dc, vfio_user_pci_dev_properties);
> + dc->desc = "VFIO over socket PCI device assignment";
> + pdc->realize = vfio_user_pci_realize;
> +}
> +
> +static const TypeInfo vfio_user_pci_dev_info = {
> + .name = TYPE_VFIO_USER_PCI,
> + .parent = TYPE_VFIO_PCI_BASE,
> + .instance_size = sizeof(VFIOUserPCIDevice),
> + .class_init = vfio_user_pci_dev_class_init,
> + .instance_init = vfio_user_instance_init,
> + .instance_finalize = vfio_user_instance_finalize,
> +};
> +
> +static void register_vfio_user_dev_type(void)
> +{
> + type_register_static(&vfio_user_pci_dev_info);
> +}
> +
> + type_init(register_vfio_user_dev_type)
Use DEFINE_TYPES as you've already done above instead of type_init() here.
> diff --git a/hw/meson.build b/hw/meson.build
> index b91f761fe0..791ce21ab4 100644
> --- a/hw/meson.build
> +++ b/hw/meson.build
> @@ -39,6 +39,7 @@ subdir('uefi')
> subdir('ufs')
> subdir('usb')
> subdir('vfio')
> +subdir('vfio-user')
> subdir('virtio')
> subdir('vmapple')
> subdir('watchdog')
> diff --git a/hw/vfio-user/meson.build b/hw/vfio-user/meson.build
> new file mode 100644
> index 0000000000..f1fee70c85
> --- /dev/null
> +++ b/hw/vfio-user/meson.build
> @@ -0,0 +1,9 @@
> +vfio_user_ss = ss.source_set()
> +vfio_user_ss.add(files(
> + 'container.c',
> + 'pci.c',
> +))
> +
> +if get_option('vfio_user_client').enabled()
> + specific_ss.add_all(vfio_user_ss)
> +endif
> diff --git a/meson_options.txt b/meson_options.txt
> index a442be2995..97d3db44cd 100644
> --- a/meson_options.txt
> +++ b/meson_options.txt
> @@ -109,6 +109,8 @@ option('multiprocess', type: 'feature', value: 'auto',
> description: 'Out of process device emulation support')
> option('relocatable', type : 'boolean', value : true,
> description: 'toggle relocatable install')
> +option('vfio_user_client', type: 'feature', value: 'disabled',
> + description: 'vfio-user client support')
> option('vfio_user_server', type: 'feature', value: 'disabled',
> description: 'vfio-user server support')
> option('dbus_display', type: 'feature', value: 'auto',
> diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
> index f09ef9604f..2c5673769a 100644
> --- a/scripts/meson-buildoptions.sh
> +++ b/scripts/meson-buildoptions.sh
> @@ -201,6 +201,8 @@ meson_options_help() {
> printf "%s\n" ' vdi vdi image format support'
> printf "%s\n" ' vduse-blk-export'
> printf "%s\n" ' VDUSE block export support'
> + printf "%s\n" ' vfio-user-client'
> + printf "%s\n" ' vfio-user client support'
> printf "%s\n" ' vfio-user-server'
> printf "%s\n" ' vfio-user server support'
> printf "%s\n" ' vhdx vhdx image format support'
> @@ -529,6 +531,8 @@ _meson_option_parse() {
> --disable-vdi) printf "%s" -Dvdi=disabled ;;
> --enable-vduse-blk-export) printf "%s" -Dvduse_blk_export=enabled ;;
> --disable-vduse-blk-export) printf "%s" -Dvduse_blk_export=disabled ;;
> + --enable-vfio-user-client) printf "%s" -Dvfio_user_client=enabled ;;
> + --disable-vfio-user-client) printf "%s" -Dvfio_user_client=disabled ;;
> --enable-vfio-user-server) printf "%s" -Dvfio_user_server=enabled ;;
> --disable-vfio-user-server) printf "%s" -Dvfio_user_server=disabled ;;
> --enable-vhdx) printf "%s" -Dvhdx=enabled ;;
ATB,
Mark.
On 6/10/25 13:42, Mark Cave-Ayland wrote:
> On 07/06/2025 01:10, John Levon wrote:
>
>> Introduce basic plumbing for vfio-user behind a new
>> --enable-vfio-user-client option.
>>
>> We introduce VFIOUserContainer in hw/vfio-user/container.c, which is a
>> container type for the "IOMMU" type "vfio-iommu-user", and share some
>> common container code from hw/vfio/container.c.
>>
>> Add hw/vfio-user/pci.c for instantiating VFIOUserPCIDevice objects,
>> sharing some common code from hw/vfio/pci.c.
>>
>> Originally-by: John Johnson <john.g.johnson@oracle.com>
>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>> Signed-off-by: John Levon <john.levon@nutanix.com>
>> ---
>> MAINTAINERS | 2 +
>> hw/vfio-user/container.h | 25 +++
>> include/hw/vfio/vfio-container-base.h | 1 +
>> hw/vfio-user/container.c | 212 ++++++++++++++++++++++++++
>> hw/vfio-user/pci.c | 155 +++++++++++++++++++
>> hw/meson.build | 1 +
>> hw/vfio-user/meson.build | 9 ++
>> meson_options.txt | 2 +
>> scripts/meson-buildoptions.sh | 4 +
>> 9 files changed, 411 insertions(+)
>> create mode 100644 hw/vfio-user/container.h
>> create mode 100644 hw/vfio-user/container.c
>> create mode 100644 hw/vfio-user/pci.c
>> create mode 100644 hw/vfio-user/meson.build
>>
>> diff --git a/MAINTAINERS b/MAINTAINERS
>> index dbc393989a..328bab8d19 100644
>> --- a/MAINTAINERS
>> +++ b/MAINTAINERS
>> @@ -4252,6 +4252,8 @@ M: John Levon <john.levon@nutanix.com>
>> M: Thanos Makatos <thanos.makatos@nutanix.com>
>> S: Supported
>> F: docs/devel/vfio-user.rst
>> +F: hw/vfio-user/*
>> +F: include/hw/vfio-user/*
>> F: subprojects/libvfio-user
>
> Question: how do you see the division between hw/vfio and hw/vfio-user? My initial feeling is that there is substantial sharing between the two, in which case I'd expect the files to be in hw/vfio as e.g. hw/vfio/container-user.c etc. instead of its own directory.
>
> Cédric, what are your thoughts here?
The vfio devices are very dependent on the bus they depend on.
My idea is to keep under hw/vfio the initial implementation of
the vfio-pci device and the common services for other "complex"
vfio devices like vfio-user-pci. Which give us :
hw/vfio/* 17323 lines
hw/vfio-user/* 2955 lines
vfio-ap and vfio-ccw are small but I might still move them under
s390x. vfio-platform will be removed in the next QEMU cycle.
Also, util/vfio-helpers.c, on which is built the nvme BlockDriver,
should be merged under the common services, so under hw/vfio.
Thanks,
C.
>
>> EBPF:
>> diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h
>> new file mode 100644
>> index 0000000000..3cd3303e68
>> --- /dev/null
>> +++ b/hw/vfio-user/container.h
>> @@ -0,0 +1,25 @@
>> +/*
>> + * vfio-user specific definitions.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2. See
>> + * the COPYING file in the top-level directory.
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>> +
>> +#ifndef HW_VFIO_USER_CONTAINER_H
>> +#define HW_VFIO_USER_CONTAINER_H
>> +
>> +#include <inttypes.h>
>> +#include <stdbool.h>
>
> These shouldn't be included directly: you probably just want:
>
> #include "qemu/osdep.h"
>
> instead.
>
>> +#include "hw/vfio/vfio-container-base.h"
>> +
>> +/* MMU container sub-class for vfio-user. */
>> +typedef struct VFIOUserContainer {
>> + VFIOContainerBase bcontainer;
>> +} VFIOUserContainer;
>> +
>> +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER);
>
> As per the documentation at https://qemu-project.gitlab.io/qemu/devel/style.html#qemu-object-model-declarations the parent object should always be named parent_obj and struct shouldn't have a typedef i.e.
>
> /* MMU container sub-class for vfio-user. */
> struct VFIOUserContainer {
> VFIOContainerBase parent_obj;
> };
>
> OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER);
>
>> +#endif /* HW_VFIO_USER_CONTAINER_H */
>> diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
>> index 9d37f86115..28899ca0a6 100644
>> --- a/include/hw/vfio/vfio-container-base.h
>> +++ b/include/hw/vfio/vfio-container-base.h
>> @@ -109,6 +109,7 @@ vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer)
>> #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
>> #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
>> #define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
>> +#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user"
>> OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
>> diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c
>> new file mode 100644
>> index 0000000000..2892845b4f
>> --- /dev/null
>> +++ b/hw/vfio-user/container.c
>> @@ -0,0 +1,212 @@
>> +/*
>> + * Container for vfio-user IOMMU type: rather than communicating with the kernel
>> + * vfio driver, we communicate over a socket to a server using the vfio-user
>> + * protocol.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2. See
>> + * the COPYING file in the top-level directory.
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>> +
>> +#include <sys/ioctl.h>
>> +#include <linux/vfio.h>
>> +#include "qemu/osdep.h"
>> +
>> +#include "hw/vfio-user/container.h"
>> +#include "hw/vfio/vfio-cpr.h"
>> +#include "hw/vfio/vfio-device.h"
>> +#include "hw/vfio/vfio-listener.h"
>> +#include "qapi/error.h"
>> +#include "trace.h"
>> +
>> +static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
>> + hwaddr iova, ram_addr_t size,
>> + IOMMUTLBEntry *iotlb, bool unmap_all)
>> +{
>> + return -ENOTSUP;
>> +}
>> +
>> +static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
>> + ram_addr_t size, void *vaddr, bool readonly,
>> + MemoryRegion *mrp)
>> +{
>> + return -ENOTSUP;
>> +}
>> +
>> +static int
>> +vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
>> + bool start, Error **errp)
>> +{
>> + error_setg_errno(errp, ENOTSUP, "Not supported");
>> + return -ENOTSUP;
>> +}
>> +
>> +static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
>> + VFIOBitmap *vbmap, hwaddr iova,
>> + hwaddr size, Error **errp)
>> +{
>> + error_setg_errno(errp, ENOTSUP, "Not supported");
>> + return -ENOTSUP;
>> +}
>> +
>> +static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
>> +{
>> + error_setg_errno(errp, ENOTSUP, "Not supported");
>> + return -ENOTSUP;
>> +}
>> +
>> +static VFIOUserContainer *vfio_user_create_container(Error **errp)
>> +{
>> + VFIOUserContainer *container;
>> +
>> + container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
>> + return container;
>> +}
>> +
>> +/*
>> + * Try to mirror vfio_container_connect() as much as possible.
>> + */
>> +static VFIOUserContainer *
>> +vfio_user_container_connect(AddressSpace *as, Error **errp)
>> +{
>> + VFIOContainerBase *bcontainer;
>> + VFIOUserContainer *container;
>> + VFIOAddressSpace *space;
>> + VFIOIOMMUClass *vioc;
>> +
>> + space = vfio_address_space_get(as);
>> +
>> + container = vfio_user_create_container(errp);
>> + if (!container) {
>> + goto put_space_exit;
>> + }
>> +
>> + bcontainer = &container->bcontainer;
>
> References to the object hierarchy should always be done with the automatically generated QOM cast macros since they are easier to read, and also ensure type safety e.g.:
>
> bcontainer = VFIO_IOMMU(container);
>
>> + if (!vfio_cpr_register_container(bcontainer, errp)) {
>> + goto free_container_exit;
>> + }
>> +
>> + vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
>> + assert(vioc->setup);
>> +
>> + if (!vioc->setup(bcontainer, errp)) {
>> + goto unregister_container_exit;
>> + }
>> +
>> + vfio_address_space_insert(space, bcontainer);
>> +
>> + if (!vfio_listener_register(bcontainer, errp)) {
>> + goto listener_release_exit;
>> + }
>> +
>> + bcontainer->initialized = true;
>> +
>> + return container;
>> +
>> +listener_release_exit:
>> + vfio_listener_unregister(bcontainer);
>> + if (vioc->release) {
>> + vioc->release(bcontainer);
>> + }
>> +
>> +unregister_container_exit:
>> + vfio_cpr_unregister_container(bcontainer);
>> +
>> +free_container_exit:
>> + object_unref(container);
>> +
>> +put_space_exit:
>> + vfio_address_space_put(space);
>> +
>> + return NULL;
>> +}
>> +
>> +static void vfio_user_container_disconnect(VFIOUserContainer *container)
>> +{
>> + VFIOContainerBase *bcontainer = &container->bcontainer;
>
> Use a QOM cast here.
>
>> + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
>> +
>> + vfio_listener_unregister(bcontainer);
>> + if (vioc->release) {
>> + vioc->release(bcontainer);
>> + }
>> +
>> + VFIOAddressSpace *space = bcontainer->space;
>> +
>> + vfio_cpr_unregister_container(bcontainer);
>> + object_unref(container);
>> +
>> + vfio_address_space_put(space);
>> +}
>> +
>> +static bool vfio_user_device_get(VFIOUserContainer *container,
>> + VFIODevice *vbasedev, Error **errp)
>> +{
>> + struct vfio_device_info info = { 0 };
>> +
>> + vbasedev->fd = -1;
>> +
>> + vfio_device_prepare(vbasedev, &container->bcontainer, &info);
>
> Use a QOM cast here.
>
>> + return true;
>> +}
>> +
>> +/*
>> + * vfio_user_device_attach: attach a device to a new container.
>> + */
>> +static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev,
>> + AddressSpace *as, Error **errp)
>> +{
>> + VFIOUserContainer *container;
>> +
>> + container = vfio_user_container_connect(as, errp);
>> + if (container == NULL) {
>> + error_prepend(errp, "failed to connect proxy");
>> + return false;
>> + }
>> +
>> + return vfio_user_device_get(container, vbasedev, errp);
>> +}
>> +
>> +static void vfio_user_device_detach(VFIODevice *vbasedev)
>> +{
>> + VFIOUserContainer *container = container_of(vbasedev->bcontainer,
>> + VFIOUserContainer, bcontainer);
>> +
>> + vfio_device_unprepare(vbasedev);
>> +
>> + vfio_user_container_disconnect(container);
>> +}
>> +
>> +static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
>> +{
>> + /* ->needs_reset is always false for vfio-user. */
>> + return 0;
>> +}
>> +
>> +static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
>> +{
>> + VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
>> +
>> + vioc->setup = vfio_user_setup;
>> + vioc->dma_map = vfio_user_dma_map;
>> + vioc->dma_unmap = vfio_user_dma_unmap;
>> + vioc->attach_device = vfio_user_device_attach;
>> + vioc->detach_device = vfio_user_device_detach;
>> + vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
>> + vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
>> + vioc->pci_hot_reset = vfio_user_pci_hot_reset;
>> +};
>> +
>> +static const TypeInfo types[] = {
>> + {
>> + .name = TYPE_VFIO_IOMMU_USER,
>> + .parent = TYPE_VFIO_IOMMU,
>> + .instance_size = sizeof(VFIOUserContainer),
>> + .class_init = vfio_iommu_user_class_init,
>> + },
>> +};
>> +
>> +DEFINE_TYPES(types)
>> diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c
>> new file mode 100644
>> index 0000000000..74b0c61f9b
>> --- /dev/null
>> +++ b/hw/vfio-user/pci.c
>> @@ -0,0 +1,155 @@
>> +/*
>> + * vfio PCI device over a UNIX socket.
>> + *
>> + * Copyright © 2018, 2021 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + * SPDX-License-Identifier: GPL-2.0-or-later
>> + */
>> +
>> +#include <sys/ioctl.h>
>> +#include "qemu/osdep.h"
>> +
>> +#include "hw/qdev-properties.h"
>> +#include "hw/vfio/pci.h"
>> +
>> +#define TYPE_VFIO_USER_PCI "vfio-user-pci"
>> +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI)
>> +
>> +struct VFIOUserPCIDevice {
>> + VFIOPCIDevice device;
>> + char *sock_name;
>> +};
>
> Again as per the documentation link above, device should be called parent_obj plus there should be a empty line between parent_obj and the other members i.e.
>
> struct VFIOUserPCIDevice {
> VFIOPCIDevice parent_obj;
>
> char *sock_name;
> }
>
> Note that by using QOM casts the name of the parent object member is not exposed to the remainder of the code.
>
>> +/*
>> + * Emulated devices don't use host hot reset
>> + */
>> +static void vfio_user_compute_needs_reset(VFIODevice *vbasedev)
>> +{
>> + vbasedev->needs_reset = false;
>> +}
>> +
>> +static Object *vfio_user_pci_get_object(VFIODevice *vbasedev)
>> +{
>> + VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice,
>> + device.vbasedev);
>
> Hmmm. Not for this series but this may suggest VFIODevice should be QOMified.
>
>> + return OBJECT(vdev);
>> +}
> > > +static VFIODeviceOps vfio_user_pci_ops = {
>> + .vfio_compute_needs_reset = vfio_user_compute_needs_reset,
>> + .vfio_eoi = vfio_pci_intx_eoi,
>> + .vfio_get_object = vfio_user_pci_get_object,
>> + /* No live migration support yet. */
>> + .vfio_save_config = NULL,
>> + .vfio_load_config = NULL,
>> +};
>> +
>> +static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
>> +{
>> + ERRP_GUARD();
>> + VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
>> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
>> + VFIODevice *vbasedev = &vdev->vbasedev;
>> + AddressSpace *as;
>> +
>> + /*
>> + * TODO: make option parser understand SocketAddress
>> + * and use that instead of having scalar options
>> + * for each socket type.
>> + */
>> + if (!udev->sock_name) {
>> + error_setg(errp, "No socket specified");
>> + error_append_hint(errp, "Use -device vfio-user-pci,socket=<name>\n");
>> + return;
>> + }
>> +
>> + vbasedev->name = g_strdup_printf("VFIO user <%s>", udev->sock_name);
>> +
>> + /*
>> + * vfio-user devices are effectively mdevs (don't use a host iommu).
>> + */
>> + vbasedev->mdev = true;
>> +
>> + as = pci_device_iommu_address_space(pdev);
>> + if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER,
>> + vbasedev->name, vbasedev,
>> + as, errp)) {
>> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
>> + return;
>> + }
>> +}
>> +
>> +static void vfio_user_instance_init(Object *obj)
>> +{
>> + PCIDevice *pci_dev = PCI_DEVICE(obj);
>> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
>> + VFIODevice *vbasedev = &vdev->vbasedev;
>> +
>> + device_add_bootindex_property(obj, &vdev->bootindex,
>> + "bootindex", NULL,
>> + &pci_dev->qdev);
>> + vdev->host.domain = ~0U;
>> + vdev->host.bus = ~0U;
>> + vdev->host.slot = ~0U;
>> + vdev->host.function = ~0U;
>> +
>> + vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops,
>> + DEVICE(vdev), false);
>> +
>> + vdev->nv_gpudirect_clique = 0xFF;
>> +
>> + /*
>> + * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
>> + * line, therefore, no need to wait to realize like other devices.
>> + */
>> + pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
>> +}
>> +
>> +static void vfio_user_instance_finalize(Object *obj)
>> +{
>> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
>> +
>> + vfio_pci_put_device(vdev);
>> +}
>> +
>> +static const Property vfio_user_pci_dev_properties[] = {
>> + DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice,
>> + vendor_id, PCI_ANY_ID),
>> + DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice,
>> + device_id, PCI_ANY_ID),
>> + DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
>> + sub_vendor_id, PCI_ANY_ID),
>> + DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
>> + sub_device_id, PCI_ANY_ID),
>> + DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name),
>> +};
>> +
>> +static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data)
>> +{
>> + DeviceClass *dc = DEVICE_CLASS(klass);
>> + PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
>> +
>> + device_class_set_props(dc, vfio_user_pci_dev_properties);
>> + dc->desc = "VFIO over socket PCI device assignment";
>> + pdc->realize = vfio_user_pci_realize;
>> +}
>> +
>> +static const TypeInfo vfio_user_pci_dev_info = {
>> + .name = TYPE_VFIO_USER_PCI,
>> + .parent = TYPE_VFIO_PCI_BASE,
>> + .instance_size = sizeof(VFIOUserPCIDevice),
>> + .class_init = vfio_user_pci_dev_class_init,
>> + .instance_init = vfio_user_instance_init,
>> + .instance_finalize = vfio_user_instance_finalize,
>> +};
>> +
>> +static void register_vfio_user_dev_type(void)
>> +{
>> + type_register_static(&vfio_user_pci_dev_info);
>> +}
>> +
>> + type_init(register_vfio_user_dev_type)
>
> Use DEFINE_TYPES as you've already done above instead of type_init() here.
>
>> diff --git a/hw/meson.build b/hw/meson.build
>> index b91f761fe0..791ce21ab4 100644
>> --- a/hw/meson.build
>> +++ b/hw/meson.build
>> @@ -39,6 +39,7 @@ subdir('uefi')
>> subdir('ufs')
>> subdir('usb')
>> subdir('vfio')
>> +subdir('vfio-user')
>> subdir('virtio')
>> subdir('vmapple')
>> subdir('watchdog')
>> diff --git a/hw/vfio-user/meson.build b/hw/vfio-user/meson.build
>> new file mode 100644
>> index 0000000000..f1fee70c85
>> --- /dev/null
>> +++ b/hw/vfio-user/meson.build
>> @@ -0,0 +1,9 @@
>> +vfio_user_ss = ss.source_set()
>> +vfio_user_ss.add(files(
>> + 'container.c',
>> + 'pci.c',
>> +))
>> +
>> +if get_option('vfio_user_client').enabled()
>> + specific_ss.add_all(vfio_user_ss)
>> +endif
>> diff --git a/meson_options.txt b/meson_options.txt
>> index a442be2995..97d3db44cd 100644
>> --- a/meson_options.txt
>> +++ b/meson_options.txt
>> @@ -109,6 +109,8 @@ option('multiprocess', type: 'feature', value: 'auto',
>> description: 'Out of process device emulation support')
>> option('relocatable', type : 'boolean', value : true,
>> description: 'toggle relocatable install')
>> +option('vfio_user_client', type: 'feature', value: 'disabled',
>> + description: 'vfio-user client support')
>> option('vfio_user_server', type: 'feature', value: 'disabled',
>> description: 'vfio-user server support')
>> option('dbus_display', type: 'feature', value: 'auto',
>> diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
>> index f09ef9604f..2c5673769a 100644
>> --- a/scripts/meson-buildoptions.sh
>> +++ b/scripts/meson-buildoptions.sh
>> @@ -201,6 +201,8 @@ meson_options_help() {
>> printf "%s\n" ' vdi vdi image format support'
>> printf "%s\n" ' vduse-blk-export'
>> printf "%s\n" ' VDUSE block export support'
>> + printf "%s\n" ' vfio-user-client'
>> + printf "%s\n" ' vfio-user client support'
>> printf "%s\n" ' vfio-user-server'
>> printf "%s\n" ' vfio-user server support'
>> printf "%s\n" ' vhdx vhdx image format support'
>> @@ -529,6 +531,8 @@ _meson_option_parse() {
>> --disable-vdi) printf "%s" -Dvdi=disabled ;;
>> --enable-vduse-blk-export) printf "%s" -Dvduse_blk_export=enabled ;;
>> --disable-vduse-blk-export) printf "%s" -Dvduse_blk_export=disabled ;;
>> + --enable-vfio-user-client) printf "%s" -Dvfio_user_client=enabled ;;
>> + --disable-vfio-user-client) printf "%s" -Dvfio_user_client=disabled ;;
>> --enable-vfio-user-server) printf "%s" -Dvfio_user_server=enabled ;;
>> --disable-vfio-user-server) printf "%s" -Dvfio_user_server=disabled ;;
>> --enable-vhdx) printf "%s" -Dvhdx=enabled ;;
>
>
> ATB,
>
> Mark.
>
On Tue, Jun 10, 2025 at 12:42:35PM +0100, Mark Cave-Ayland wrote:
> Question: how do you see the division between hw/vfio and hw/vfio-user? My
> initial feeling is that there is substantial sharing between the two, in
> which case I'd expect the files to be in hw/vfio as e.g.
> hw/vfio/container-user.c etc. instead of its own directory.
That was also in the earlier patchsets! Cédric asked for hw/vfio-user - and I
think I actually prefer it myself. The amount we export from hw/vfio is actually
fairly minimal (now).
> > +#include "hw/vfio/vfio-container-base.h"
> > +
> > +/* MMU container sub-class for vfio-user. */
> > +typedef struct VFIOUserContainer {
> > + VFIOContainerBase bcontainer;
> > +} VFIOUserContainer;
> > +
> > +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER);
>
> As per the documentation at https://qemu-project.gitlab.io/qemu/devel/style.html#qemu-object-model-declarations
> the parent object should always be named parent_obj and struct shouldn't
> have a typedef i.e.
>
> /* MMU container sub-class for vfio-user. */
> struct VFIOUserContainer {
> VFIOContainerBase parent_obj;
> };
>
> OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER);
I don't think I want to diverge from VFIOContainer here, though, right?
> > +static VFIOUserContainer *
> > +vfio_user_container_connect(AddressSpace *as, Error **errp)
> > +{
> > + VFIOContainerBase *bcontainer;
> > + VFIOUserContainer *container;
> > + VFIOAddressSpace *space;
> > + VFIOIOMMUClass *vioc;
> > +
> > + space = vfio_address_space_get(as);
> > +
> > + container = vfio_user_create_container(errp);
> > + if (!container) {
> > + goto put_space_exit;
> > + }
> > +
> > + bcontainer = &container->bcontainer;
>
> References to the object hierarchy should always be done with the
> automatically generated QOM cast macros since they are easier to read, and
> also ensure type safety e.g.:
>
> bcontainer = VFIO_IOMMU(container);
Ditto. Sounds like a fine future cleanup that should be applied to all the code,
but for now, and for review reasons, I'd prefer to be "the same" as hw/vfio/
> > +struct VFIOUserPCIDevice {
> > + VFIOPCIDevice device;
> > + char *sock_name;
> > +};
>
> Again as per the documentation link above, device should be called
> parent_obj plus there should be a empty line between parent_obj and the
> other members i.e.
>
> struct VFIOUserPCIDevice {
> VFIOPCIDevice parent_obj;
>
> char *sock_name;
> }
>
> Note that by using QOM casts the name of the parent object member is not
> exposed to the remainder of the code.
I can make this change, though, as there's no vfio equivalent, if Cédric thinks
I should too.
> > +static void register_vfio_user_dev_type(void)
> > +{
> > + type_register_static(&vfio_user_pci_dev_info);
> > +}
> > +
> > + type_init(register_vfio_user_dev_type)
>
> Use DEFINE_TYPES as you've already done above instead of type_init() here.
Again, same as hw/vfio/pci.c
regards
john
On 6/10/25 13:56, John Levon wrote: > On Tue, Jun 10, 2025 at 12:42:35PM +0100, Mark Cave-Ayland wrote: > >> Question: how do you see the division between hw/vfio and hw/vfio-user? My >> initial feeling is that there is substantial sharing between the two, in >> which case I'd expect the files to be in hw/vfio as e.g. >> hw/vfio/container-user.c etc. instead of its own directory. > > That was also in the earlier patchsets! Cédric asked for hw/vfio-user - and I > think I actually prefer it myself. The amount we export from hw/vfio is actually > fairly minimal (now). yes. It looks much better. The interfaces between the core VFIO framework and the new vfio-user-pci device are clear. This is easier for maintenance too. Thanks, C.
On 6/7/25 02:10, John Levon wrote:
> Introduce basic plumbing for vfio-user behind a new
> --enable-vfio-user-client option.
>
> We introduce VFIOUserContainer in hw/vfio-user/container.c, which is a
> container type for the "IOMMU" type "vfio-iommu-user", and share some
> common container code from hw/vfio/container.c.
>
> Add hw/vfio-user/pci.c for instantiating VFIOUserPCIDevice objects,
> sharing some common code from hw/vfio/pci.c.
>
> Originally-by: John Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: John Levon <john.levon@nutanix.com>
> ---
> MAINTAINERS | 2 +
> hw/vfio-user/container.h | 25 +++
> include/hw/vfio/vfio-container-base.h | 1 +
> hw/vfio-user/container.c | 212 ++++++++++++++++++++++++++
> hw/vfio-user/pci.c | 155 +++++++++++++++++++
> hw/meson.build | 1 +
> hw/vfio-user/meson.build | 9 ++
> meson_options.txt | 2 +
> scripts/meson-buildoptions.sh | 4 +
> 9 files changed, 411 insertions(+)
> create mode 100644 hw/vfio-user/container.h
> create mode 100644 hw/vfio-user/container.c
> create mode 100644 hw/vfio-user/pci.c
> create mode 100644 hw/vfio-user/meson.build
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index dbc393989a..328bab8d19 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -4252,6 +4252,8 @@ M: John Levon <john.levon@nutanix.com>
> M: Thanos Makatos <thanos.makatos@nutanix.com>
> S: Supported
> F: docs/devel/vfio-user.rst
> +F: hw/vfio-user/*
> +F: include/hw/vfio-user/*
> F: subprojects/libvfio-user
>
> EBPF:
> diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h
> new file mode 100644
> index 0000000000..3cd3303e68
> --- /dev/null
> +++ b/hw/vfio-user/container.h
> @@ -0,0 +1,25 @@
> +/*
> + * vfio-user specific definitions.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2. See
> + * the COPYING file in the top-level directory.
Please drop the license boiler plate. This is redundant with SPDX.
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */> +
> +#ifndef HW_VFIO_USER_CONTAINER_H
> +#define HW_VFIO_USER_CONTAINER_H
> +
> +#include <inttypes.h>
> +#include <stdbool.h>
> +
> +#include "hw/vfio/vfio-container-base.h"
> +
> +/* MMU container sub-class for vfio-user. */
> +typedef struct VFIOUserContainer {
> + VFIOContainerBase bcontainer;
> +} VFIOUserContainer;
> +
> +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER);
> +
> +#endif /* HW_VFIO_USER_CONTAINER_H */
> diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
> index 9d37f86115..28899ca0a6 100644
> --- a/include/hw/vfio/vfio-container-base.h
> +++ b/include/hw/vfio/vfio-container-base.h
> @@ -109,6 +109,7 @@ vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer)
> #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
> #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
> #define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
> +#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user"
>
> OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
>
> diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c
> new file mode 100644
> index 0000000000..2892845b4f
> --- /dev/null
> +++ b/hw/vfio-user/container.c
> @@ -0,0 +1,212 @@
> +/*
> + * Container for vfio-user IOMMU type: rather than communicating with the kernel
> + * vfio driver, we communicate over a socket to a server using the vfio-user
> + * protocol.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2. See
> + * the COPYING file in the top-level directory.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include <sys/ioctl.h>
> +#include <linux/vfio.h>
> +#include "qemu/osdep.h"
> +
> +#include "hw/vfio-user/container.h"
> +#include "hw/vfio/vfio-cpr.h"
> +#include "hw/vfio/vfio-device.h"
> +#include "hw/vfio/vfio-listener.h"
> +#include "qapi/error.h"
> +#include "trace.h"
> +
> +static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
> + hwaddr iova, ram_addr_t size,
> + IOMMUTLBEntry *iotlb, bool unmap_all)
> +{
> + return -ENOTSUP;
> +}
> +
> +static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
> + ram_addr_t size, void *vaddr, bool readonly,
> + MemoryRegion *mrp)
> +{
> + return -ENOTSUP;
> +}
> +
> +static int
> +vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
> + bool start, Error **errp)
> +{
> + error_setg_errno(errp, ENOTSUP, "Not supported");
> + return -ENOTSUP;
> +}
> +
> +static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
> + VFIOBitmap *vbmap, hwaddr iova,
> + hwaddr size, Error **errp)
> +{
> + error_setg_errno(errp, ENOTSUP, "Not supported");
> + return -ENOTSUP;
> +}
> +
> +static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
> +{
> + error_setg_errno(errp, ENOTSUP, "Not supported");
> + return -ENOTSUP;
> +}
> +
> +static VFIOUserContainer *vfio_user_create_container(Error **errp)
> +{
> + VFIOUserContainer *container;
> +
> + container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
> + return container;
> +}
> +
> +/*
> + * Try to mirror vfio_container_connect() as much as possible.
> + */
> +static VFIOUserContainer *
> +vfio_user_container_connect(AddressSpace *as, Error **errp)
> +{
> + VFIOContainerBase *bcontainer;
> + VFIOUserContainer *container;
> + VFIOAddressSpace *space;
> + VFIOIOMMUClass *vioc;
> +
> + space = vfio_address_space_get(as);
> +
> + container = vfio_user_create_container(errp);
> + if (!container) {
> + goto put_space_exit;
> + }
> +
> + bcontainer = &container->bcontainer;
> +
> + if (!vfio_cpr_register_container(bcontainer, errp)) {
> + goto free_container_exit;
> + }
> +
> + vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
> + assert(vioc->setup);
> +
> + if (!vioc->setup(bcontainer, errp)) {
> + goto unregister_container_exit;
> + }
> +
> + vfio_address_space_insert(space, bcontainer);
> +
> + if (!vfio_listener_register(bcontainer, errp)) {
> + goto listener_release_exit;
> + }
> +
> + bcontainer->initialized = true;
> +
> + return container;
> +
> +listener_release_exit:
> + vfio_listener_unregister(bcontainer);
> + if (vioc->release) {
> + vioc->release(bcontainer);
> + }
> +
> +unregister_container_exit:
> + vfio_cpr_unregister_container(bcontainer);
> +
> +free_container_exit:
> + object_unref(container);
> +
> +put_space_exit:
> + vfio_address_space_put(space);
> +
> + return NULL;
> +}
> +
> +static void vfio_user_container_disconnect(VFIOUserContainer *container)
> +{
> + VFIOContainerBase *bcontainer = &container->bcontainer;
> + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
> +
> + vfio_listener_unregister(bcontainer);
> + if (vioc->release) {
> + vioc->release(bcontainer);
> + }
> +
> + VFIOAddressSpace *space = bcontainer->space;
> +
> + vfio_cpr_unregister_container(bcontainer);
> + object_unref(container);
> +
> + vfio_address_space_put(space);
> +}
> +
> +static bool vfio_user_device_get(VFIOUserContainer *container,
> + VFIODevice *vbasedev, Error **errp)
> +{
> + struct vfio_device_info info = { 0 };
> +
> + vbasedev->fd = -1;
> +
> + vfio_device_prepare(vbasedev, &container->bcontainer, &info);
> +
> + return true;
> +}
> +
> +/*
> + * vfio_user_device_attach: attach a device to a new container.
> + */
> +static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev,
> + AddressSpace *as, Error **errp)
> +{
> + VFIOUserContainer *container;
> +
> + container = vfio_user_container_connect(as, errp);
> + if (container == NULL) {
> + error_prepend(errp, "failed to connect proxy");
> + return false;
> + }
> +
> + return vfio_user_device_get(container, vbasedev, errp);
> +}
> +
> +static void vfio_user_device_detach(VFIODevice *vbasedev)
> +{
> + VFIOUserContainer *container = container_of(vbasedev->bcontainer,
> + VFIOUserContainer, bcontainer);
> +
> + vfio_device_unprepare(vbasedev);
> +
> + vfio_user_container_disconnect(container);
> +}
> +
> +static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
> +{
> + /* ->needs_reset is always false for vfio-user. */
> + return 0;
> +}
> +
> +static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
> +{
> + VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
> +
> + vioc->setup = vfio_user_setup;
> + vioc->dma_map = vfio_user_dma_map;
> + vioc->dma_unmap = vfio_user_dma_unmap;
> + vioc->attach_device = vfio_user_device_attach;
> + vioc->detach_device = vfio_user_device_detach;
> + vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
> + vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
> + vioc->pci_hot_reset = vfio_user_pci_hot_reset;
> +};
> +
> +static const TypeInfo types[] = {
> + {
> + .name = TYPE_VFIO_IOMMU_USER,
> + .parent = TYPE_VFIO_IOMMU,
> + .instance_size = sizeof(VFIOUserContainer),
> + .class_init = vfio_iommu_user_class_init,
> + },
> +};
> +
> +DEFINE_TYPES(types)
> diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c
> new file mode 100644
> index 0000000000..74b0c61f9b
> --- /dev/null
> +++ b/hw/vfio-user/pci.c
> @@ -0,0 +1,155 @@
> +/*
> + * vfio PCI device over a UNIX socket.
> + *
> + * Copyright © 2018, 2021 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include <sys/ioctl.h>
> +#include "qemu/osdep.h"
> +
> +#include "hw/qdev-properties.h"
> +#include "hw/vfio/pci.h"
> +
> +#define TYPE_VFIO_USER_PCI "vfio-user-pci"
> +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI)
> +
> +struct VFIOUserPCIDevice {
> + VFIOPCIDevice device;
> + char *sock_name;
> +};
> +
> +/*
> + * Emulated devices don't use host hot reset
> + */
> +static void vfio_user_compute_needs_reset(VFIODevice *vbasedev)
> +{
> + vbasedev->needs_reset = false;
> +}
> +
> +static Object *vfio_user_pci_get_object(VFIODevice *vbasedev)
> +{
> + VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice,
> + device.vbasedev);
> +
> + return OBJECT(vdev);
> +}
> +
> +static VFIODeviceOps vfio_user_pci_ops = {
> + .vfio_compute_needs_reset = vfio_user_compute_needs_reset,
> + .vfio_eoi = vfio_pci_intx_eoi,
> + .vfio_get_object = vfio_user_pci_get_object,
> + /* No live migration support yet. */
> + .vfio_save_config = NULL,
> + .vfio_load_config = NULL,
> +};
> +
> +static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
> +{
> + ERRP_GUARD();
> + VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + AddressSpace *as;
> +
> + /*
> + * TODO: make option parser understand SocketAddress
> + * and use that instead of having scalar options
> + * for each socket type.
> + */
> + if (!udev->sock_name) {
> + error_setg(errp, "No socket specified");
> + error_append_hint(errp, "Use -device vfio-user-pci,socket=<name>\n");
> + return;
> + }
> +
> + vbasedev->name = g_strdup_printf("VFIO user <%s>", udev->sock_name);
> +
> + /*
> + * vfio-user devices are effectively mdevs (don't use a host iommu).
> + */
> + vbasedev->mdev = true;
> +
> + as = pci_device_iommu_address_space(pdev);
> + if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER,
> + vbasedev->name, vbasedev,
> + as, errp)) {
> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
> + return;
> + }
> +}
> +
> +static void vfio_user_instance_init(Object *obj)
> +{
> + PCIDevice *pci_dev = PCI_DEVICE(obj);
> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
> + VFIODevice *vbasedev = &vdev->vbasedev;
> +
> + device_add_bootindex_property(obj, &vdev->bootindex,
> + "bootindex", NULL,
> + &pci_dev->qdev);
> + vdev->host.domain = ~0U;
> + vdev->host.bus = ~0U;
> + vdev->host.slot = ~0U;
> + vdev->host.function = ~0U;
> +
> + vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops,
> + DEVICE(vdev), false);
> +
> + vdev->nv_gpudirect_clique = 0xFF;
> +
> + /*
> + * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
> + * line, therefore, no need to wait to realize like other devices.
> + */
> + pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
> +}
> +
> +static void vfio_user_instance_finalize(Object *obj)
> +{
> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
> +
> + vfio_pci_put_device(vdev);
> +}
> +
> +static const Property vfio_user_pci_dev_properties[] = {
> + DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice,
> + vendor_id, PCI_ANY_ID),
> + DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice,
> + device_id, PCI_ANY_ID),
> + DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
> + sub_vendor_id, PCI_ANY_ID),
> + DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
> + sub_device_id, PCI_ANY_ID),
> + DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name),
> +};
> +
> +static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data)
> +{
> + DeviceClass *dc = DEVICE_CLASS(klass);
> + PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
> +
> + device_class_set_props(dc, vfio_user_pci_dev_properties);
> + dc->desc = "VFIO over socket PCI device assignment";
> + pdc->realize = vfio_user_pci_realize;
> +}
> +
> +static const TypeInfo vfio_user_pci_dev_info = {
> + .name = TYPE_VFIO_USER_PCI,
> + .parent = TYPE_VFIO_PCI_BASE,
> + .instance_size = sizeof(VFIOUserPCIDevice),
> + .class_init = vfio_user_pci_dev_class_init,
> + .instance_init = vfio_user_instance_init,
> + .instance_finalize = vfio_user_instance_finalize,
> +};
> +
> +static void register_vfio_user_dev_type(void)
> +{
> + type_register_static(&vfio_user_pci_dev_info);
> +}
> +
> + type_init(register_vfio_user_dev_type)
> diff --git a/hw/meson.build b/hw/meson.build
> index b91f761fe0..791ce21ab4 100644
> --- a/hw/meson.build
> +++ b/hw/meson.build
> @@ -39,6 +39,7 @@ subdir('uefi')
> subdir('ufs')
> subdir('usb')
> subdir('vfio')
> +subdir('vfio-user')
> subdir('virtio')
> subdir('vmapple')
> subdir('watchdog')
> diff --git a/hw/vfio-user/meson.build b/hw/vfio-user/meson.build
> new file mode 100644
> index 0000000000..f1fee70c85
> --- /dev/null
> +++ b/hw/vfio-user/meson.build
> @@ -0,0 +1,9 @@
> +vfio_user_ss = ss.source_set()
> +vfio_user_ss.add(files(
> + 'container.c',
> + 'pci.c',
> +))
> +
> +if get_option('vfio_user_client').enabled()
> + specific_ss.add_all(vfio_user_ss)
> +endif
> diff --git a/meson_options.txt b/meson_options.txt
> index a442be2995..97d3db44cd 100644
> --- a/meson_options.txt
> +++ b/meson_options.txt
> @@ -109,6 +109,8 @@ option('multiprocess', type: 'feature', value: 'auto',
> description: 'Out of process device emulation support')
> option('relocatable', type : 'boolean', value : true,
> description: 'toggle relocatable install')
> +option('vfio_user_client', type: 'feature', value: 'disabled',
> + description: 'vfio-user client support')
> option('vfio_user_server', type: 'feature', value: 'disabled',
> description: 'vfio-user server support')
> option('dbus_display', type: 'feature', value: 'auto',
> diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
> index f09ef9604f..2c5673769a 100644
> --- a/scripts/meson-buildoptions.sh
> +++ b/scripts/meson-buildoptions.sh
> @@ -201,6 +201,8 @@ meson_options_help() {
> printf "%s\n" ' vdi vdi image format support'
> printf "%s\n" ' vduse-blk-export'
> printf "%s\n" ' VDUSE block export support'
> + printf "%s\n" ' vfio-user-client'
> + printf "%s\n" ' vfio-user client support'
> printf "%s\n" ' vfio-user-server'
> printf "%s\n" ' vfio-user server support'
> printf "%s\n" ' vhdx vhdx image format support'
> @@ -529,6 +531,8 @@ _meson_option_parse() {
> --disable-vdi) printf "%s" -Dvdi=disabled ;;
> --enable-vduse-blk-export) printf "%s" -Dvduse_blk_export=enabled ;;
> --disable-vduse-blk-export) printf "%s" -Dvduse_blk_export=disabled ;;
> + --enable-vfio-user-client) printf "%s" -Dvfio_user_client=enabled ;;
> + --disable-vfio-user-client) printf "%s" -Dvfio_user_client=disabled ;;
> --enable-vfio-user-server) printf "%s" -Dvfio_user_server=enabled ;;
> --disable-vfio-user-server) printf "%s" -Dvfio_user_server=disabled ;;
> --enable-vhdx) printf "%s" -Dvhdx=enabled ;;
can't we simply have a CONFIG option and select the device on platforms
supporting it ?
Thanks,
C.
On Tue, Jun 10, 2025 at 11:57:00AM +0200, Cédric Le Goater wrote:
> > @@ -529,6 +531,8 @@ _meson_option_parse() {
> > --disable-vdi) printf "%s" -Dvdi=disabled ;;
> > --enable-vduse-blk-export) printf "%s" -Dvduse_blk_export=enabled ;;
> > --disable-vduse-blk-export) printf "%s" -Dvduse_blk_export=disabled ;;
> > + --enable-vfio-user-client) printf "%s" -Dvfio_user_client=enabled ;;
> > + --disable-vfio-user-client) printf "%s" -Dvfio_user_client=disabled ;;
> > --enable-vfio-user-server) printf "%s" -Dvfio_user_server=enabled ;;
> > --disable-vfio-user-server) printf "%s" -Dvfio_user_server=disabled ;;
> > --enable-vhdx) printf "%s" -Dvhdx=enabled ;;
>
> can't we simply have a CONFIG option and select the device on platforms
> supporting it ?
You mean always build vfio-user client rather than optionally? Why would it be
different from other optional components?
AFAIK all platforms (at least in theory) would support it.
regards
john
On 6/10/25 18:52, John Levon wrote:
> On Tue, Jun 10, 2025 at 11:57:00AM +0200, Cédric Le Goater wrote:
>
>>> @@ -529,6 +531,8 @@ _meson_option_parse() {
>>> --disable-vdi) printf "%s" -Dvdi=disabled ;;
>>> --enable-vduse-blk-export) printf "%s" -Dvduse_blk_export=enabled ;;
>>> --disable-vduse-blk-export) printf "%s" -Dvduse_blk_export=disabled ;;
>>> + --enable-vfio-user-client) printf "%s" -Dvfio_user_client=enabled ;;
>>> + --disable-vfio-user-client) printf "%s" -Dvfio_user_client=disabled ;;
>>> --enable-vfio-user-server) printf "%s" -Dvfio_user_server=enabled ;;
>>> --disable-vfio-user-server) printf "%s" -Dvfio_user_server=disabled ;;
>>> --enable-vhdx) printf "%s" -Dvhdx=enabled ;;
>>
>> can't we simply have a CONFIG option and select the device on platforms
>> supporting it ?
>
> You mean always build vfio-user client rather than optionally? Why would it be
> different from other optional components?
why would it be optional ? I don't see any dependency for vfio-user-client.
vfio-user-server depends on libvfio-user [1]. Not vfio-user-client.
Should it ?
Does this mean we are abandoning libvfio-user ? Sorry I am not familiar with
this framework.
> AFAIK all platforms (at least in theory) would support it.
So what would be the reason for not compiling it ? It is not different
from VFIO AFAICT.
Thanks,
C.
[1] https://gitlab.com/qemu-project/libvfio-user
On Wed, Jun 11, 2025 at 09:46:15AM +0200, Cédric Le Goater wrote: > > > can't we simply have a CONFIG option and select the device on platforms > > > supporting it ? > > > > You mean always build vfio-user client rather than optionally? Why would it be > > different from other optional components? > > why would it be optional ? I don't see any dependency for vfio-user-client. > vfio-user-server depends on libvfio-user [1]. Not vfio-user-client. > Should it ? libvfio-user is for the server side, you are correct that the client does not depend on it at all. (It's still actively in use.) > > AFAIK all platforms (at least in theory) would support it. > > So what would be the reason for not compiling it ? It is not different > from VFIO AFAICT. OK, thanks. I simply didn't know qemu policy on building things in by default. Of course I would be very happy to see it built into every qemu :) regards john
© 2016 - 2025 Red Hat, Inc.