Introduce basic plumbing for vfio-user with CONFIG_VFIO_USER.
We introduce VFIOUserContainer in hw/vfio-user/container.c, which is a
container type for the "IOMMU" type "vfio-iommu-user", and share some
common container code from hw/vfio/container.c.
Add hw/vfio-user/pci.c for instantiating VFIOUserPCIDevice objects,
sharing some common code from hw/vfio/pci.c.
Originally-by: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John Levon <john.levon@nutanix.com>
---
MAINTAINERS | 8 +
hw/vfio-user/container.h | 21 +++
include/hw/vfio/vfio-container-base.h | 1 +
hw/vfio-user/container.c | 208 ++++++++++++++++++++++++++
hw/vfio-user/pci.c | 185 +++++++++++++++++++++++
hw/Kconfig | 1 +
hw/meson.build | 1 +
hw/vfio-user/Kconfig | 7 +
hw/vfio-user/meson.build | 9 ++
9 files changed, 441 insertions(+)
create mode 100644 hw/vfio-user/container.h
create mode 100644 hw/vfio-user/container.c
create mode 100644 hw/vfio-user/pci.c
create mode 100644 hw/vfio-user/Kconfig
create mode 100644 hw/vfio-user/meson.build
diff --git a/MAINTAINERS b/MAINTAINERS
index 27f4fe3f25..2369391004 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4253,6 +4253,14 @@ F: hw/remote/iommu.c
F: include/hw/remote/iommu.h
F: tests/functional/test_multiprocess.py
+VFIO-USER:
+M: John Levon <john.levon@nutanix.com>
+M: Thanos Makatos <thanos.makatos@nutanix.com>
+S: Supported
+F: hw/vfio-user/*
+F: include/hw/vfio-user/*
+F: subprojects/libvfio-user
+
EBPF:
M: Jason Wang <jasowang@redhat.com>
R: Andrew Melnychenko <andrew@daynix.com>
diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h
new file mode 100644
index 0000000000..e4a46d2c1b
--- /dev/null
+++ b/hw/vfio-user/container.h
@@ -0,0 +1,21 @@
+/*
+ * vfio-user specific definitions.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HW_VFIO_USER_CONTAINER_H
+#define HW_VFIO_USER_CONTAINER_H
+
+#include "qemu/osdep.h"
+
+#include "hw/vfio/vfio-container-base.h"
+
+/* MMU container sub-class for vfio-user. */
+typedef struct VFIOUserContainer {
+ VFIOContainerBase bcontainer;
+} VFIOUserContainer;
+
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER);
+
+#endif /* HW_VFIO_USER_CONTAINER_H */
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index f0232654ee..3cd86ec59e 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -109,6 +109,7 @@ vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer)
#define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
#define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
#define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
+#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user"
OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c
new file mode 100644
index 0000000000..2367332177
--- /dev/null
+++ b/hw/vfio-user/container.c
@@ -0,0 +1,208 @@
+/*
+ * Container for vfio-user IOMMU type: rather than communicating with the kernel
+ * vfio driver, we communicate over a socket to a server using the vfio-user
+ * protocol.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <sys/ioctl.h>
+#include <linux/vfio.h>
+#include "qemu/osdep.h"
+
+#include "hw/vfio-user/container.h"
+#include "hw/vfio/vfio-cpr.h"
+#include "hw/vfio/vfio-device.h"
+#include "hw/vfio/vfio-listener.h"
+#include "qapi/error.h"
+
+static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb, bool unmap_all)
+{
+ return -ENOTSUP;
+}
+
+static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
+ ram_addr_t size, void *vaddr, bool readonly,
+ MemoryRegion *mrp)
+{
+ return -ENOTSUP;
+}
+
+static int
+vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
+ bool start, Error **errp)
+{
+ error_setg_errno(errp, ENOTSUP, "Not supported");
+ return -ENOTSUP;
+}
+
+static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
+ VFIOBitmap *vbmap, hwaddr iova,
+ hwaddr size, Error **errp)
+{
+ error_setg_errno(errp, ENOTSUP, "Not supported");
+ return -ENOTSUP;
+}
+
+static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
+{
+ error_setg_errno(errp, ENOTSUP, "Not supported");
+ return -ENOTSUP;
+}
+
+static VFIOUserContainer *vfio_user_create_container(Error **errp)
+{
+ VFIOUserContainer *container;
+
+ container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
+ return container;
+}
+
+/*
+ * Try to mirror vfio_container_connect() as much as possible.
+ */
+static VFIOUserContainer *
+vfio_user_container_connect(AddressSpace *as, Error **errp)
+{
+ VFIOContainerBase *bcontainer;
+ VFIOUserContainer *container;
+ VFIOAddressSpace *space;
+ VFIOIOMMUClass *vioc;
+
+ space = vfio_address_space_get(as);
+
+ container = vfio_user_create_container(errp);
+ if (!container) {
+ goto put_space_exit;
+ }
+
+ bcontainer = &container->bcontainer;
+
+ if (!vfio_cpr_register_container(bcontainer, errp)) {
+ goto free_container_exit;
+ }
+
+ vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+ assert(vioc->setup);
+
+ if (!vioc->setup(bcontainer, errp)) {
+ goto unregister_container_exit;
+ }
+
+ vfio_address_space_insert(space, bcontainer);
+
+ if (!vfio_listener_register(bcontainer, errp)) {
+ goto listener_release_exit;
+ }
+
+ bcontainer->initialized = true;
+
+ return container;
+
+listener_release_exit:
+ vfio_listener_unregister(bcontainer);
+ if (vioc->release) {
+ vioc->release(bcontainer);
+ }
+
+unregister_container_exit:
+ vfio_cpr_unregister_container(bcontainer);
+
+free_container_exit:
+ object_unref(container);
+
+put_space_exit:
+ vfio_address_space_put(space);
+
+ return NULL;
+}
+
+static void vfio_user_container_disconnect(VFIOUserContainer *container)
+{
+ VFIOContainerBase *bcontainer = &container->bcontainer;
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
+
+ vfio_listener_unregister(bcontainer);
+ if (vioc->release) {
+ vioc->release(bcontainer);
+ }
+
+ VFIOAddressSpace *space = bcontainer->space;
+
+ vfio_cpr_unregister_container(bcontainer);
+ object_unref(container);
+
+ vfio_address_space_put(space);
+}
+
+static bool vfio_user_device_get(VFIOUserContainer *container,
+ VFIODevice *vbasedev, Error **errp)
+{
+ struct vfio_device_info info = { 0 };
+
+ vbasedev->fd = -1;
+
+ vfio_device_prepare(vbasedev, &container->bcontainer, &info);
+
+ return true;
+}
+
+/*
+ * vfio_user_device_attach: attach a device to a new container.
+ */
+static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev,
+ AddressSpace *as, Error **errp)
+{
+ VFIOUserContainer *container;
+
+ container = vfio_user_container_connect(as, errp);
+ if (container == NULL) {
+ error_prepend(errp, "failed to connect proxy");
+ return false;
+ }
+
+ return vfio_user_device_get(container, vbasedev, errp);
+}
+
+static void vfio_user_device_detach(VFIODevice *vbasedev)
+{
+ VFIOUserContainer *container = container_of(vbasedev->bcontainer,
+ VFIOUserContainer, bcontainer);
+
+ vfio_device_unprepare(vbasedev);
+
+ vfio_user_container_disconnect(container);
+}
+
+static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
+{
+ /* ->needs_reset is always false for vfio-user. */
+ return 0;
+}
+
+static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
+{
+ VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
+
+ vioc->setup = vfio_user_setup;
+ vioc->dma_map = vfio_user_dma_map;
+ vioc->dma_unmap = vfio_user_dma_unmap;
+ vioc->attach_device = vfio_user_device_attach;
+ vioc->detach_device = vfio_user_device_detach;
+ vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
+ vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
+ vioc->pci_hot_reset = vfio_user_pci_hot_reset;
+};
+
+static const TypeInfo types[] = {
+ {
+ .name = TYPE_VFIO_IOMMU_USER,
+ .parent = TYPE_VFIO_IOMMU,
+ .instance_size = sizeof(VFIOUserContainer),
+ .class_init = vfio_iommu_user_class_init,
+ },
+};
+
+DEFINE_TYPES(types)
diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c
new file mode 100644
index 0000000000..86d7055747
--- /dev/null
+++ b/hw/vfio-user/pci.c
@@ -0,0 +1,185 @@
+/*
+ * vfio PCI device over a UNIX socket.
+ *
+ * Copyright © 2018, 2021 Oracle and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include <sys/ioctl.h>
+#include "qemu/osdep.h"
+#include "qapi-visit-sockets.h"
+
+#include "hw/qdev-properties.h"
+#include "hw/vfio/pci.h"
+
+#define TYPE_VFIO_USER_PCI "vfio-user-pci"
+OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI)
+
+struct VFIOUserPCIDevice {
+ VFIOPCIDevice device;
+ SocketAddress *socket;
+};
+
+/*
+ * Emulated devices don't use host hot reset
+ */
+static void vfio_user_compute_needs_reset(VFIODevice *vbasedev)
+{
+ vbasedev->needs_reset = false;
+}
+
+static Object *vfio_user_pci_get_object(VFIODevice *vbasedev)
+{
+ VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice,
+ device.vbasedev);
+
+ return OBJECT(vdev);
+}
+
+static VFIODeviceOps vfio_user_pci_ops = {
+ .vfio_compute_needs_reset = vfio_user_compute_needs_reset,
+ .vfio_eoi = vfio_pci_intx_eoi,
+ .vfio_get_object = vfio_user_pci_get_object,
+ /* No live migration support yet. */
+ .vfio_save_config = NULL,
+ .vfio_load_config = NULL,
+};
+
+static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
+{
+ ERRP_GUARD();
+ VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
+ VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+ const char *sock_name;
+ AddressSpace *as;
+
+ if (!udev->socket) {
+ error_setg(errp, "No socket specified");
+ error_append_hint(errp, "e.g. -device '{"
+ "\"driver\":\"vfio-user-pci\", "
+ "\"socket\": {\"path\": \"/tmp/vfio-user.sock\", "
+ "\"type\": \"unix\"}'"
+ "}'\n");
+ return;
+ }
+
+ sock_name = udev->socket->u.q_unix.path;
+
+ vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name);
+
+ /*
+ * vfio-user devices are effectively mdevs (don't use a host iommu).
+ */
+ vbasedev->mdev = true;
+
+ as = pci_device_iommu_address_space(pdev);
+ if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER,
+ vbasedev->name, vbasedev,
+ as, errp)) {
+ error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
+ return;
+ }
+}
+
+static void vfio_user_instance_init(Object *obj)
+{
+ PCIDevice *pci_dev = PCI_DEVICE(obj);
+ VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+ VFIODevice *vbasedev = &vdev->vbasedev;
+
+ device_add_bootindex_property(obj, &vdev->bootindex,
+ "bootindex", NULL,
+ &pci_dev->qdev);
+ vdev->host.domain = ~0U;
+ vdev->host.bus = ~0U;
+ vdev->host.slot = ~0U;
+ vdev->host.function = ~0U;
+
+ vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops,
+ DEVICE(vdev), false);
+
+ vdev->nv_gpudirect_clique = 0xFF;
+
+ /*
+ * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
+ * line, therefore, no need to wait to realize like other devices.
+ */
+ pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
+}
+
+static void vfio_user_instance_finalize(Object *obj)
+{
+ VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
+
+ vfio_pci_put_device(vdev);
+}
+
+static const Property vfio_user_pci_dev_properties[] = {
+ DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice,
+ vendor_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice,
+ device_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
+ sub_vendor_id, PCI_ANY_ID),
+ DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
+ sub_device_id, PCI_ANY_ID),
+};
+
+static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name,
+ void *opaque, Error **errp)
+{
+ VFIOUserPCIDevice *udev = VFIO_USER_PCI(obj);
+ bool success;
+
+ qapi_free_SocketAddress(udev->socket);
+
+ udev->socket = NULL;
+
+ success = visit_type_SocketAddress(v, name, &udev->socket, errp);
+
+ if (!success) {
+ return;
+ }
+
+ if (udev->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
+ error_setg(errp, "Unsupported socket type %s",
+ SocketAddressType_str(udev->socket->type));
+ qapi_free_SocketAddress(udev->socket);
+ udev->socket = NULL;
+ return;
+ }
+}
+
+static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data)
+{
+ DeviceClass *dc = DEVICE_CLASS(klass);
+ PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
+
+ device_class_set_props(dc, vfio_user_pci_dev_properties);
+
+ object_class_property_add(klass, "socket", "SocketAddress", NULL,
+ vfio_user_pci_set_socket, NULL, NULL);
+ object_class_property_set_description(klass, "socket",
+ "SocketAddress (UNIX sockets only)");
+
+ dc->desc = "VFIO over socket PCI device assignment";
+ pdc->realize = vfio_user_pci_realize;
+}
+
+static const TypeInfo vfio_user_pci_dev_info = {
+ .name = TYPE_VFIO_USER_PCI,
+ .parent = TYPE_VFIO_PCI_BASE,
+ .instance_size = sizeof(VFIOUserPCIDevice),
+ .class_init = vfio_user_pci_dev_class_init,
+ .instance_init = vfio_user_instance_init,
+ .instance_finalize = vfio_user_instance_finalize,
+};
+
+static void register_vfio_user_dev_type(void)
+{
+ type_register_static(&vfio_user_pci_dev_info);
+}
+
+ type_init(register_vfio_user_dev_type)
diff --git a/hw/Kconfig b/hw/Kconfig
index 9a86a6a28a..9e6c789ae7 100644
--- a/hw/Kconfig
+++ b/hw/Kconfig
@@ -42,6 +42,7 @@ source ufs/Kconfig
source usb/Kconfig
source virtio/Kconfig
source vfio/Kconfig
+source vfio-user/Kconfig
source vmapple/Kconfig
source xen/Kconfig
source watchdog/Kconfig
diff --git a/hw/meson.build b/hw/meson.build
index b91f761fe0..791ce21ab4 100644
--- a/hw/meson.build
+++ b/hw/meson.build
@@ -39,6 +39,7 @@ subdir('uefi')
subdir('ufs')
subdir('usb')
subdir('vfio')
+subdir('vfio-user')
subdir('virtio')
subdir('vmapple')
subdir('watchdog')
diff --git a/hw/vfio-user/Kconfig b/hw/vfio-user/Kconfig
new file mode 100644
index 0000000000..24bdf7af90
--- /dev/null
+++ b/hw/vfio-user/Kconfig
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+config VFIO_USER
+ bool
+ default y
+ depends on VFIO_PCI
+
diff --git a/hw/vfio-user/meson.build b/hw/vfio-user/meson.build
new file mode 100644
index 0000000000..b82c558252
--- /dev/null
+++ b/hw/vfio-user/meson.build
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+vfio_user_ss = ss.source_set()
+vfio_user_ss.add(files(
+ 'container.c',
+ 'pci.c',
+))
+
+system_ss.add_all(when: 'CONFIG_VFIO_USER', if_true: vfio_user_ss)
--
2.43.0
+Steve
On 6/25/25 21:29, John Levon wrote:
> Introduce basic plumbing for vfio-user with CONFIG_VFIO_USER.
>
> We introduce VFIOUserContainer in hw/vfio-user/container.c, which is a
> container type for the "IOMMU" type "vfio-iommu-user", and share some
> common container code from hw/vfio/container.c.
>
> Add hw/vfio-user/pci.c for instantiating VFIOUserPCIDevice objects,
> sharing some common code from hw/vfio/pci.c.
>
> Originally-by: John Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: John Levon <john.levon@nutanix.com>
> ---
> MAINTAINERS | 8 +
> hw/vfio-user/container.h | 21 +++
> include/hw/vfio/vfio-container-base.h | 1 +
> hw/vfio-user/container.c | 208 ++++++++++++++++++++++++++
> hw/vfio-user/pci.c | 185 +++++++++++++++++++++++
> hw/Kconfig | 1 +
> hw/meson.build | 1 +
> hw/vfio-user/Kconfig | 7 +
> hw/vfio-user/meson.build | 9 ++
> 9 files changed, 441 insertions(+)
> create mode 100644 hw/vfio-user/container.h
> create mode 100644 hw/vfio-user/container.c
> create mode 100644 hw/vfio-user/pci.c
> create mode 100644 hw/vfio-user/Kconfig
> create mode 100644 hw/vfio-user/meson.build
>
> diff --git a/MAINTAINERS b/MAINTAINERS
> index 27f4fe3f25..2369391004 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -4253,6 +4253,14 @@ F: hw/remote/iommu.c
> F: include/hw/remote/iommu.h
> F: tests/functional/test_multiprocess.py
>
> +VFIO-USER:
> +M: John Levon <john.levon@nutanix.com>
> +M: Thanos Makatos <thanos.makatos@nutanix.com>
> +S: Supported
> +F: hw/vfio-user/*
> +F: include/hw/vfio-user/*
> +F: subprojects/libvfio-user
> +
> EBPF:
> M: Jason Wang <jasowang@redhat.com>
> R: Andrew Melnychenko <andrew@daynix.com>
> diff --git a/hw/vfio-user/container.h b/hw/vfio-user/container.h
> new file mode 100644
> index 0000000000..e4a46d2c1b
> --- /dev/null
> +++ b/hw/vfio-user/container.h
> @@ -0,0 +1,21 @@
> +/*
> + * vfio-user specific definitions.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#ifndef HW_VFIO_USER_CONTAINER_H
> +#define HW_VFIO_USER_CONTAINER_H
> +
> +#include "qemu/osdep.h"
> +
> +#include "hw/vfio/vfio-container-base.h"
> +
> +/* MMU container sub-class for vfio-user. */
> +typedef struct VFIOUserContainer {
> + VFIOContainerBase bcontainer;
> +} VFIOUserContainer;
> +
> +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserContainer, VFIO_IOMMU_USER);
> +
> +#endif /* HW_VFIO_USER_CONTAINER_H */
> diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
> index f0232654ee..3cd86ec59e 100644
> --- a/include/hw/vfio/vfio-container-base.h
> +++ b/include/hw/vfio/vfio-container-base.h
> @@ -109,6 +109,7 @@ vfio_container_get_page_size_mask(const VFIOContainerBase *bcontainer)
> #define TYPE_VFIO_IOMMU_LEGACY TYPE_VFIO_IOMMU "-legacy"
> #define TYPE_VFIO_IOMMU_SPAPR TYPE_VFIO_IOMMU "-spapr"
> #define TYPE_VFIO_IOMMU_IOMMUFD TYPE_VFIO_IOMMU "-iommufd"
> +#define TYPE_VFIO_IOMMU_USER TYPE_VFIO_IOMMU "-user"
>
> OBJECT_DECLARE_TYPE(VFIOContainerBase, VFIOIOMMUClass, VFIO_IOMMU)
>
> diff --git a/hw/vfio-user/container.c b/hw/vfio-user/container.c
> new file mode 100644
> index 0000000000..2367332177
> --- /dev/null
> +++ b/hw/vfio-user/container.c
> @@ -0,0 +1,208 @@
> +/*
> + * Container for vfio-user IOMMU type: rather than communicating with the kernel
> + * vfio driver, we communicate over a socket to a server using the vfio-user
> + * protocol.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include <sys/ioctl.h>
> +#include <linux/vfio.h>
> +#include "qemu/osdep.h"
> +
> +#include "hw/vfio-user/container.h"
> +#include "hw/vfio/vfio-cpr.h"
> +#include "hw/vfio/vfio-device.h"
> +#include "hw/vfio/vfio-listener.h"
> +#include "qapi/error.h"
> +
> +static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
> + hwaddr iova, ram_addr_t size,
> + IOMMUTLBEntry *iotlb, bool unmap_all)
> +{
> + return -ENOTSUP;
> +}
> +
> +static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
> + ram_addr_t size, void *vaddr, bool readonly,
> + MemoryRegion *mrp)
> +{
> + return -ENOTSUP;
> +}
> +
> +static int
> +vfio_user_set_dirty_page_tracking(const VFIOContainerBase *bcontainer,
> + bool start, Error **errp)
> +{
> + error_setg_errno(errp, ENOTSUP, "Not supported");
> + return -ENOTSUP;
> +}
> +
> +static int vfio_user_query_dirty_bitmap(const VFIOContainerBase *bcontainer,
> + VFIOBitmap *vbmap, hwaddr iova,
> + hwaddr size, Error **errp)
> +{
> + error_setg_errno(errp, ENOTSUP, "Not supported");
> + return -ENOTSUP;
> +}
> +
> +static bool vfio_user_setup(VFIOContainerBase *bcontainer, Error **errp)
> +{
> + error_setg_errno(errp, ENOTSUP, "Not supported");
> + return -ENOTSUP;
> +}
> +
> +static VFIOUserContainer *vfio_user_create_container(Error **errp)
> +{
> + VFIOUserContainer *container;
> +
> + container = VFIO_IOMMU_USER(object_new(TYPE_VFIO_IOMMU_USER));
> + return container;
> +}
> +
> +/*
> + * Try to mirror vfio_container_connect() as much as possible.
> + */
> +static VFIOUserContainer *
> +vfio_user_container_connect(AddressSpace *as, Error **errp)
> +{
> + VFIOContainerBase *bcontainer;
> + VFIOUserContainer *container;
> + VFIOAddressSpace *space;
> + VFIOIOMMUClass *vioc;
> +
> + space = vfio_address_space_get(as);
> +
> + container = vfio_user_create_container(errp);
> + if (!container) {
> + goto put_space_exit;
> + }
> +
> + bcontainer = &container->bcontainer;
> +
> + if (!vfio_cpr_register_container(bcontainer, errp)) {
> + goto free_container_exit;
> + }
I missed the CPR usage in vfio-user when reviewing and I think it should
be addressed in a separate series. This also interferes with Steve's "live
update" changes.
John,
Could please drop the CPR register ?
Thanks,
C.
> +
> + vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
> + assert(vioc->setup);
> +
> + if (!vioc->setup(bcontainer, errp)) {
> + goto unregister_container_exit;
> + }
> +
> + vfio_address_space_insert(space, bcontainer);
> +
> + if (!vfio_listener_register(bcontainer, errp)) {
> + goto listener_release_exit;
> + }
> +
> + bcontainer->initialized = true;
> +
> + return container;
> +
> +listener_release_exit:
> + vfio_listener_unregister(bcontainer);
> + if (vioc->release) {
> + vioc->release(bcontainer);
> + }
> +
> +unregister_container_exit:
> + vfio_cpr_unregister_container(bcontainer);
> +
> +free_container_exit:
> + object_unref(container);
> +
> +put_space_exit:
> + vfio_address_space_put(space);
> +
> + return NULL;
> +}
> +
> +static void vfio_user_container_disconnect(VFIOUserContainer *container)
> +{
> + VFIOContainerBase *bcontainer = &container->bcontainer;
> + VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
> +
> + vfio_listener_unregister(bcontainer);
> + if (vioc->release) {
> + vioc->release(bcontainer);
> + }
> +
> + VFIOAddressSpace *space = bcontainer->space;
> +
> + vfio_cpr_unregister_container(bcontainer);
> + object_unref(container);
> +
> + vfio_address_space_put(space);
> +}
> +
> +static bool vfio_user_device_get(VFIOUserContainer *container,
> + VFIODevice *vbasedev, Error **errp)
> +{
> + struct vfio_device_info info = { 0 };
> +
> + vbasedev->fd = -1;
> +
> + vfio_device_prepare(vbasedev, &container->bcontainer, &info);
> +
> + return true;
> +}
> +
> +/*
> + * vfio_user_device_attach: attach a device to a new container.
> + */
> +static bool vfio_user_device_attach(const char *name, VFIODevice *vbasedev,
> + AddressSpace *as, Error **errp)
> +{
> + VFIOUserContainer *container;
> +
> + container = vfio_user_container_connect(as, errp);
> + if (container == NULL) {
> + error_prepend(errp, "failed to connect proxy");
> + return false;
> + }
> +
> + return vfio_user_device_get(container, vbasedev, errp);
> +}
> +
> +static void vfio_user_device_detach(VFIODevice *vbasedev)
> +{
> + VFIOUserContainer *container = container_of(vbasedev->bcontainer,
> + VFIOUserContainer, bcontainer);
> +
> + vfio_device_unprepare(vbasedev);
> +
> + vfio_user_container_disconnect(container);
> +}
> +
> +static int vfio_user_pci_hot_reset(VFIODevice *vbasedev, bool single)
> +{
> + /* ->needs_reset is always false for vfio-user. */
> + return 0;
> +}
> +
> +static void vfio_iommu_user_class_init(ObjectClass *klass, const void *data)
> +{
> + VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
> +
> + vioc->setup = vfio_user_setup;
> + vioc->dma_map = vfio_user_dma_map;
> + vioc->dma_unmap = vfio_user_dma_unmap;
> + vioc->attach_device = vfio_user_device_attach;
> + vioc->detach_device = vfio_user_device_detach;
> + vioc->set_dirty_page_tracking = vfio_user_set_dirty_page_tracking;
> + vioc->query_dirty_bitmap = vfio_user_query_dirty_bitmap;
> + vioc->pci_hot_reset = vfio_user_pci_hot_reset;
> +};
> +
> +static const TypeInfo types[] = {
> + {
> + .name = TYPE_VFIO_IOMMU_USER,
> + .parent = TYPE_VFIO_IOMMU,
> + .instance_size = sizeof(VFIOUserContainer),
> + .class_init = vfio_iommu_user_class_init,
> + },
> +};
> +
> +DEFINE_TYPES(types)
> diff --git a/hw/vfio-user/pci.c b/hw/vfio-user/pci.c
> new file mode 100644
> index 0000000000..86d7055747
> --- /dev/null
> +++ b/hw/vfio-user/pci.c
> @@ -0,0 +1,185 @@
> +/*
> + * vfio PCI device over a UNIX socket.
> + *
> + * Copyright © 2018, 2021 Oracle and/or its affiliates.
> + *
> + * SPDX-License-Identifier: GPL-2.0-or-later
> + */
> +
> +#include <sys/ioctl.h>
> +#include "qemu/osdep.h"
> +#include "qapi-visit-sockets.h"
> +
> +#include "hw/qdev-properties.h"
> +#include "hw/vfio/pci.h"
> +
> +#define TYPE_VFIO_USER_PCI "vfio-user-pci"
> +OBJECT_DECLARE_SIMPLE_TYPE(VFIOUserPCIDevice, VFIO_USER_PCI)
> +
> +struct VFIOUserPCIDevice {
> + VFIOPCIDevice device;
> + SocketAddress *socket;
> +};
> +
> +/*
> + * Emulated devices don't use host hot reset
> + */
> +static void vfio_user_compute_needs_reset(VFIODevice *vbasedev)
> +{
> + vbasedev->needs_reset = false;
> +}
> +
> +static Object *vfio_user_pci_get_object(VFIODevice *vbasedev)
> +{
> + VFIOUserPCIDevice *vdev = container_of(vbasedev, VFIOUserPCIDevice,
> + device.vbasedev);
> +
> + return OBJECT(vdev);
> +}
> +
> +static VFIODeviceOps vfio_user_pci_ops = {
> + .vfio_compute_needs_reset = vfio_user_compute_needs_reset,
> + .vfio_eoi = vfio_pci_intx_eoi,
> + .vfio_get_object = vfio_user_pci_get_object,
> + /* No live migration support yet. */
> + .vfio_save_config = NULL,
> + .vfio_load_config = NULL,
> +};
> +
> +static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
> +{
> + ERRP_GUARD();
> + VFIOUserPCIDevice *udev = VFIO_USER_PCI(pdev);
> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
> + VFIODevice *vbasedev = &vdev->vbasedev;
> + const char *sock_name;
> + AddressSpace *as;
> +
> + if (!udev->socket) {
> + error_setg(errp, "No socket specified");
> + error_append_hint(errp, "e.g. -device '{"
> + "\"driver\":\"vfio-user-pci\", "
> + "\"socket\": {\"path\": \"/tmp/vfio-user.sock\", "
> + "\"type\": \"unix\"}'"
> + "}'\n");
> + return;
> + }
> +
> + sock_name = udev->socket->u.q_unix.path;
> +
> + vbasedev->name = g_strdup_printf("vfio-user:%s", sock_name);
> +
> + /*
> + * vfio-user devices are effectively mdevs (don't use a host iommu).
> + */
> + vbasedev->mdev = true;
> +
> + as = pci_device_iommu_address_space(pdev);
> + if (!vfio_device_attach_by_iommu_type(TYPE_VFIO_IOMMU_USER,
> + vbasedev->name, vbasedev,
> + as, errp)) {
> + error_prepend(errp, VFIO_MSG_PREFIX, vbasedev->name);
> + return;
> + }
> +}
> +
> +static void vfio_user_instance_init(Object *obj)
> +{
> + PCIDevice *pci_dev = PCI_DEVICE(obj);
> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
> + VFIODevice *vbasedev = &vdev->vbasedev;
> +
> + device_add_bootindex_property(obj, &vdev->bootindex,
> + "bootindex", NULL,
> + &pci_dev->qdev);
> + vdev->host.domain = ~0U;
> + vdev->host.bus = ~0U;
> + vdev->host.slot = ~0U;
> + vdev->host.function = ~0U;
> +
> + vfio_device_init(vbasedev, VFIO_DEVICE_TYPE_PCI, &vfio_user_pci_ops,
> + DEVICE(vdev), false);
> +
> + vdev->nv_gpudirect_clique = 0xFF;
> +
> + /*
> + * QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command
> + * line, therefore, no need to wait to realize like other devices.
> + */
> + pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS;
> +}
> +
> +static void vfio_user_instance_finalize(Object *obj)
> +{
> + VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
> +
> + vfio_pci_put_device(vdev);
> +}
> +
> +static const Property vfio_user_pci_dev_properties[] = {
> + DEFINE_PROP_UINT32("x-pci-vendor-id", VFIOPCIDevice,
> + vendor_id, PCI_ANY_ID),
> + DEFINE_PROP_UINT32("x-pci-device-id", VFIOPCIDevice,
> + device_id, PCI_ANY_ID),
> + DEFINE_PROP_UINT32("x-pci-sub-vendor-id", VFIOPCIDevice,
> + sub_vendor_id, PCI_ANY_ID),
> + DEFINE_PROP_UINT32("x-pci-sub-device-id", VFIOPCIDevice,
> + sub_device_id, PCI_ANY_ID),
> +};
> +
> +static void vfio_user_pci_set_socket(Object *obj, Visitor *v, const char *name,
> + void *opaque, Error **errp)
> +{
> + VFIOUserPCIDevice *udev = VFIO_USER_PCI(obj);
> + bool success;
> +
> + qapi_free_SocketAddress(udev->socket);
> +
> + udev->socket = NULL;
> +
> + success = visit_type_SocketAddress(v, name, &udev->socket, errp);
> +
> + if (!success) {
> + return;
> + }
> +
> + if (udev->socket->type != SOCKET_ADDRESS_TYPE_UNIX) {
> + error_setg(errp, "Unsupported socket type %s",
> + SocketAddressType_str(udev->socket->type));
> + qapi_free_SocketAddress(udev->socket);
> + udev->socket = NULL;
> + return;
> + }
> +}
> +
> +static void vfio_user_pci_dev_class_init(ObjectClass *klass, const void *data)
> +{
> + DeviceClass *dc = DEVICE_CLASS(klass);
> + PCIDeviceClass *pdc = PCI_DEVICE_CLASS(klass);
> +
> + device_class_set_props(dc, vfio_user_pci_dev_properties);
> +
> + object_class_property_add(klass, "socket", "SocketAddress", NULL,
> + vfio_user_pci_set_socket, NULL, NULL);
> + object_class_property_set_description(klass, "socket",
> + "SocketAddress (UNIX sockets only)");
> +
> + dc->desc = "VFIO over socket PCI device assignment";
> + pdc->realize = vfio_user_pci_realize;
> +}
> +
> +static const TypeInfo vfio_user_pci_dev_info = {
> + .name = TYPE_VFIO_USER_PCI,
> + .parent = TYPE_VFIO_PCI_BASE,
> + .instance_size = sizeof(VFIOUserPCIDevice),
> + .class_init = vfio_user_pci_dev_class_init,
> + .instance_init = vfio_user_instance_init,
> + .instance_finalize = vfio_user_instance_finalize,
> +};
> +
> +static void register_vfio_user_dev_type(void)
> +{
> + type_register_static(&vfio_user_pci_dev_info);
> +}
> +
> + type_init(register_vfio_user_dev_type)
> diff --git a/hw/Kconfig b/hw/Kconfig
> index 9a86a6a28a..9e6c789ae7 100644
> --- a/hw/Kconfig
> +++ b/hw/Kconfig
> @@ -42,6 +42,7 @@ source ufs/Kconfig
> source usb/Kconfig
> source virtio/Kconfig
> source vfio/Kconfig
> +source vfio-user/Kconfig
> source vmapple/Kconfig
> source xen/Kconfig
> source watchdog/Kconfig
> diff --git a/hw/meson.build b/hw/meson.build
> index b91f761fe0..791ce21ab4 100644
> --- a/hw/meson.build
> +++ b/hw/meson.build
> @@ -39,6 +39,7 @@ subdir('uefi')
> subdir('ufs')
> subdir('usb')
> subdir('vfio')
> +subdir('vfio-user')
> subdir('virtio')
> subdir('vmapple')
> subdir('watchdog')
> diff --git a/hw/vfio-user/Kconfig b/hw/vfio-user/Kconfig
> new file mode 100644
> index 0000000000..24bdf7af90
> --- /dev/null
> +++ b/hw/vfio-user/Kconfig
> @@ -0,0 +1,7 @@
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +
> +config VFIO_USER
> + bool
> + default y
> + depends on VFIO_PCI
> +
> diff --git a/hw/vfio-user/meson.build b/hw/vfio-user/meson.build
> new file mode 100644
> index 0000000000..b82c558252
> --- /dev/null
> +++ b/hw/vfio-user/meson.build
> @@ -0,0 +1,9 @@
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +
> +vfio_user_ss = ss.source_set()
> +vfio_user_ss.add(files(
> + 'container.c',
> + 'pci.c',
> +))
> +
> +system_ss.add_all(when: 'CONFIG_VFIO_USER', if_true: vfio_user_ss)
On 6/25/25 21:29, John Levon wrote: > Introduce basic plumbing for vfio-user with CONFIG_VFIO_USER. > > We introduce VFIOUserContainer in hw/vfio-user/container.c, which is a > container type for the "IOMMU" type "vfio-iommu-user", and share some > common container code from hw/vfio/container.c. > > Add hw/vfio-user/pci.c for instantiating VFIOUserPCIDevice objects, > sharing some common code from hw/vfio/pci.c. > > Originally-by: John Johnson <john.g.johnson@oracle.com> > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > Signed-off-by: John Levon <john.levon@nutanix.com> Reviewed-by: Cédric Le Goater <clg@redhat.com> Thanks, C.
© 2016 - 2025 Red Hat, Inc.