From: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
include/hw/vfio/vfio-common.h | 3 ++
hw/vfio/common.c | 84 +++++++++++++++++++++++++++++++++++
hw/vfio/pci.c | 22 +++++++++
3 files changed, 109 insertions(+)
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index bdd25a546c..688660c28d 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -91,6 +91,7 @@ typedef struct VFIOContainer {
uint64_t max_dirty_bitmap_size;
unsigned long pgsizes;
unsigned int dma_max_mappings;
+ VFIOProxy *proxy;
QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
QLIST_HEAD(, VFIOGroup) group_list;
@@ -217,6 +218,8 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp);
void vfio_put_group(VFIOGroup *group);
int vfio_get_device(VFIOGroup *group, const char *name,
VFIODevice *vbasedev, Error **errp);
+void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as);
+void vfio_disconnect_proxy(VFIOGroup *group);
extern const MemoryRegionOps vfio_region_ops;
typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 9fe3e05dc6..57b9e111e6 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -2249,6 +2249,55 @@ put_space_exit:
return ret;
}
+void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as)
+{
+ VFIOAddressSpace *space;
+ VFIOContainer *container;
+
+ if (QLIST_EMPTY(&vfio_group_list)) {
+ qemu_register_reset(vfio_reset_handler, NULL);
+ }
+
+ QLIST_INSERT_HEAD(&vfio_group_list, group, next);
+
+ /*
+ * try to mirror vfio_connect_container()
+ * as much as possible
+ */
+
+ space = vfio_get_address_space(as);
+
+ container = g_malloc0(sizeof(*container));
+ container->space = space;
+ container->fd = -1;
+ QLIST_INIT(&container->giommu_list);
+ QLIST_INIT(&container->hostwin_list);
+ container->proxy = proxy;
+
+ /*
+ * The proxy uses a SW IOMMU in lieu of the HW one
+ * used in the ioctl() version. Use TYPE1 with the
+ * target's page size for maximum capatibility
+ */
+ container->iommu_type = VFIO_TYPE1_IOMMU;
+ vfio_host_win_add(container, 0, (hwaddr)-1, TARGET_PAGE_SIZE);
+ container->pgsizes = TARGET_PAGE_SIZE;
+
+ container->dirty_pages_supported = true;
+ container->max_dirty_bitmap_size = VFIO_USER_DEF_MAX_XFER;
+ container->dirty_pgsizes = TARGET_PAGE_SIZE;
+
+ QLIST_INIT(&container->group_list);
+ QLIST_INSERT_HEAD(&space->containers, container, next);
+
+ group->container = container;
+ QLIST_INSERT_HEAD(&container->group_list, group, container_next);
+
+ container->listener = vfio_memory_listener;
+ memory_listener_register(&container->listener, container->space->as);
+ container->initialized = true;
+}
+
static void vfio_disconnect_container(VFIOGroup *group)
{
VFIOContainer *container = group->container;
@@ -2291,6 +2340,41 @@ static void vfio_disconnect_container(VFIOGroup *group)
}
}
+void vfio_disconnect_proxy(VFIOGroup *group)
+{
+ VFIOContainer *container = group->container;
+ VFIOAddressSpace *space = container->space;
+ VFIOGuestIOMMU *giommu, *tmp;
+
+ /*
+ * try to mirror vfio_disconnect_container()
+ * as much as possible, knowing each device
+ * is in one group and one container
+ */
+
+ QLIST_REMOVE(group, container_next);
+ group->container = NULL;
+
+ /*
+ * Explicitly release the listener first before unset container,
+ * since unset may destroy the backend container if it's the last
+ * group.
+ */
+ memory_listener_unregister(&container->listener);
+
+ QLIST_REMOVE(container, next);
+
+ QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
+ memory_region_unregister_iommu_notifier(
+ MEMORY_REGION(giommu->iommu), &giommu->n);
+ QLIST_REMOVE(giommu, giommu_next);
+ g_free(giommu);
+ }
+
+ g_free(container);
+ vfio_put_address_space(space);
+}
+
VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
{
VFIOGroup *group;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 282de6a30b..2c9fcb2fa9 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3442,6 +3442,7 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
VFIODevice *vbasedev = &vdev->vbasedev;
SocketAddress addr;
VFIOProxy *proxy;
+ VFIOGroup *group = NULL;
int ret;
Error *err = NULL;
@@ -3484,6 +3485,19 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
vbasedev->no_mmap = false;
vbasedev->ops = &vfio_user_pci_ops;
+ /*
+ * each device gets its own group and container
+ * make them unrelated to any host IOMMU groupings
+ */
+ group = g_malloc0(sizeof(*group));
+ group->fd = -1;
+ group->groupid = -1;
+ QLIST_INIT(&group->device_list);
+ QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
+ vbasedev->group = group;
+
+ vfio_connect_proxy(proxy, group, pci_device_iommu_address_space(pdev));
+
ret = vfio_user_get_info(&vdev->vbasedev);
if (ret) {
error_setg_errno(errp, -ret, "get info failure");
@@ -3587,6 +3601,9 @@ out_teardown:
vfio_teardown_msi(vdev);
vfio_bars_exit(vdev);
error:
+ if (group != NULL) {
+ vfio_disconnect_proxy(group);
+ }
vfio_user_disconnect(proxy);
error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
}
@@ -3595,6 +3612,11 @@ static void vfio_user_instance_finalize(Object *obj)
{
VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
VFIODevice *vbasedev = &vdev->vbasedev;
+ VFIOGroup *group = vbasedev->group;
+
+ vfio_disconnect_proxy(group);
+ g_free(group);
+ vbasedev->group = NULL;
vfio_put_device(vdev);
--
2.25.1
On Mon, Aug 16, 2021 at 09:42:45AM -0700, Elena Ufimtseva wrote:
> From: John Johnson <john.g.johnson@oracle.com>
>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> ---
> include/hw/vfio/vfio-common.h | 3 ++
> hw/vfio/common.c | 84 +++++++++++++++++++++++++++++++++++
> hw/vfio/pci.c | 22 +++++++++
> 3 files changed, 109 insertions(+)
Alex: I'm not familiar enough with hw/vfio/ to review this in depth. You
might have suggestions on how to unify the vfio-user and vfio kernel
concepts of groups and containers.
>
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index bdd25a546c..688660c28d 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -91,6 +91,7 @@ typedef struct VFIOContainer {
> uint64_t max_dirty_bitmap_size;
> unsigned long pgsizes;
> unsigned int dma_max_mappings;
> + VFIOProxy *proxy;
> QLIST_HEAD(, VFIOGuestIOMMU) giommu_list;
> QLIST_HEAD(, VFIOHostDMAWindow) hostwin_list;
> QLIST_HEAD(, VFIOGroup) group_list;
> @@ -217,6 +218,8 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp);
> void vfio_put_group(VFIOGroup *group);
> int vfio_get_device(VFIOGroup *group, const char *name,
> VFIODevice *vbasedev, Error **errp);
> +void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as);
> +void vfio_disconnect_proxy(VFIOGroup *group);
>
> extern const MemoryRegionOps vfio_region_ops;
> typedef QLIST_HEAD(VFIOGroupList, VFIOGroup) VFIOGroupList;
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 9fe3e05dc6..57b9e111e6 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -2249,6 +2249,55 @@ put_space_exit:
> return ret;
> }
>
> +void vfio_connect_proxy(VFIOProxy *proxy, VFIOGroup *group, AddressSpace *as)
> +{
> + VFIOAddressSpace *space;
> + VFIOContainer *container;
> +
> + if (QLIST_EMPTY(&vfio_group_list)) {
> + qemu_register_reset(vfio_reset_handler, NULL);
> + }
> +
> + QLIST_INSERT_HEAD(&vfio_group_list, group, next);
> +
> + /*
> + * try to mirror vfio_connect_container()
> + * as much as possible
> + */
> +
> + space = vfio_get_address_space(as);
> +
> + container = g_malloc0(sizeof(*container));
> + container->space = space;
> + container->fd = -1;
> + QLIST_INIT(&container->giommu_list);
> + QLIST_INIT(&container->hostwin_list);
> + container->proxy = proxy;
> +
> + /*
> + * The proxy uses a SW IOMMU in lieu of the HW one
> + * used in the ioctl() version. Use TYPE1 with the
> + * target's page size for maximum capatibility
> + */
> + container->iommu_type = VFIO_TYPE1_IOMMU;
> + vfio_host_win_add(container, 0, (hwaddr)-1, TARGET_PAGE_SIZE);
> + container->pgsizes = TARGET_PAGE_SIZE;
> +
> + container->dirty_pages_supported = true;
> + container->max_dirty_bitmap_size = VFIO_USER_DEF_MAX_XFER;
> + container->dirty_pgsizes = TARGET_PAGE_SIZE;
> +
> + QLIST_INIT(&container->group_list);
> + QLIST_INSERT_HEAD(&space->containers, container, next);
> +
> + group->container = container;
> + QLIST_INSERT_HEAD(&container->group_list, group, container_next);
> +
> + container->listener = vfio_memory_listener;
> + memory_listener_register(&container->listener, container->space->as);
> + container->initialized = true;
> +}
> +
> static void vfio_disconnect_container(VFIOGroup *group)
> {
> VFIOContainer *container = group->container;
> @@ -2291,6 +2340,41 @@ static void vfio_disconnect_container(VFIOGroup *group)
> }
> }
>
> +void vfio_disconnect_proxy(VFIOGroup *group)
> +{
> + VFIOContainer *container = group->container;
> + VFIOAddressSpace *space = container->space;
> + VFIOGuestIOMMU *giommu, *tmp;
> +
> + /*
> + * try to mirror vfio_disconnect_container()
> + * as much as possible, knowing each device
> + * is in one group and one container
> + */
> +
> + QLIST_REMOVE(group, container_next);
> + group->container = NULL;
> +
> + /*
> + * Explicitly release the listener first before unset container,
> + * since unset may destroy the backend container if it's the last
> + * group.
> + */
> + memory_listener_unregister(&container->listener);
> +
> + QLIST_REMOVE(container, next);
> +
> + QLIST_FOREACH_SAFE(giommu, &container->giommu_list, giommu_next, tmp) {
> + memory_region_unregister_iommu_notifier(
> + MEMORY_REGION(giommu->iommu), &giommu->n);
> + QLIST_REMOVE(giommu, giommu_next);
> + g_free(giommu);
> + }
> +
> + g_free(container);
> + vfio_put_address_space(space);
> +}
> +
> VFIOGroup *vfio_get_group(int groupid, AddressSpace *as, Error **errp)
> {
> VFIOGroup *group;
> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
> index 282de6a30b..2c9fcb2fa9 100644
> --- a/hw/vfio/pci.c
> +++ b/hw/vfio/pci.c
> @@ -3442,6 +3442,7 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
> VFIODevice *vbasedev = &vdev->vbasedev;
> SocketAddress addr;
> VFIOProxy *proxy;
> + VFIOGroup *group = NULL;
> int ret;
> Error *err = NULL;
>
> @@ -3484,6 +3485,19 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error **errp)
> vbasedev->no_mmap = false;
> vbasedev->ops = &vfio_user_pci_ops;
>
> + /*
> + * each device gets its own group and container
> + * make them unrelated to any host IOMMU groupings
> + */
> + group = g_malloc0(sizeof(*group));
> + group->fd = -1;
> + group->groupid = -1;
> + QLIST_INIT(&group->device_list);
> + QLIST_INSERT_HEAD(&group->device_list, vbasedev, next);
> + vbasedev->group = group;
> +
> + vfio_connect_proxy(proxy, group, pci_device_iommu_address_space(pdev));
> +
> ret = vfio_user_get_info(&vdev->vbasedev);
> if (ret) {
> error_setg_errno(errp, -ret, "get info failure");
> @@ -3587,6 +3601,9 @@ out_teardown:
> vfio_teardown_msi(vdev);
> vfio_bars_exit(vdev);
> error:
> + if (group != NULL) {
> + vfio_disconnect_proxy(group);
> + }
> vfio_user_disconnect(proxy);
> error_prepend(errp, VFIO_MSG_PREFIX, vdev->vbasedev.name);
> }
> @@ -3595,6 +3612,11 @@ static void vfio_user_instance_finalize(Object *obj)
> {
> VFIOPCIDevice *vdev = VFIO_PCI_BASE(obj);
> VFIODevice *vbasedev = &vdev->vbasedev;
> + VFIOGroup *group = vbasedev->group;
> +
> + vfio_disconnect_proxy(group);
> + g_free(group);
> + vbasedev->group = NULL;
Can vfio_put_group() be used instead? I'm worried that the cleanup code
will be duplicated or become inconsistent if it's not shared.
Also, vfio_instance_finalize() calls vfio_put_group() after
vfio_put_device(). Does this code intentionally take advantage of the if
(!vbasedev->group) early return in vfio_put_base_device()? This is
non-obvious. I recommend unifying the device and group cleanup instead
of special-casing it here (this is fragile!).
>
> vfio_put_device(vdev);
>
> --
> 2.25.1
>
© 2016 - 2026 Red Hat, Inc.