Used for communication with VFIO driver
(prep work for vfio-user, which will communicate over a socket)
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
---
hw/vfio/common.c | 126 ++++++++++++++++++++++++++++--------------
include/hw/vfio/vfio-common.h | 33 +++++++++++
2 files changed, 117 insertions(+), 42 deletions(-)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index ace9562..83d69b9 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -432,12 +432,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
goto unmap_exit;
}
- ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
+ ret = CONT_DMA_UNMAP(container, unmap, bitmap);
if (!ret) {
cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data,
iotlb->translated_addr, pages);
} else {
- error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
+ error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %s", strerror(-ret));
}
g_free(bitmap->data);
@@ -465,30 +465,7 @@ static int vfio_dma_unmap(VFIOContainer *container,
return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
}
- while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
- /*
- * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
- * v4.15) where an overflow in its wrap-around check prevents us from
- * unmapping the last page of the address space. Test for the error
- * condition and re-try the unmap excluding the last page. The
- * expectation is that we've never mapped the last page anyway and this
- * unmap request comes via vIOMMU support which also makes it unlikely
- * that this page is used. This bug was introduced well after type1 v2
- * support was introduced, so we shouldn't need to test for v1. A fix
- * is queued for kernel v5.0 so this workaround can be removed once
- * affected kernels are sufficiently deprecated.
- */
- if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
- container->iommu_type == VFIO_TYPE1v2_IOMMU) {
- trace_vfio_dma_unmap_overflow_workaround();
- unmap.size -= 1ULL << ctz64(container->pgsizes);
- continue;
- }
- error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
- return -errno;
- }
-
- return 0;
+ return CONT_DMA_UNMAP(container, &unmap, NULL);
}
static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
@@ -501,24 +478,18 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
.iova = iova,
.size = size,
};
+ int ret;
if (!readonly) {
map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
}
- /*
- * Try the mapping, if it fails with EBUSY, unmap the region and try
- * again. This shouldn't be necessary, but we sometimes see it in
- * the VGA ROM space.
- */
- if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
- (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 &&
- ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
- return 0;
- }
+ ret = CONT_DMA_MAP(container, &map);
- error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
- return -errno;
+ if (ret < 0) {
+ error_report("VFIO_MAP_DMA failed: %s", strerror(-ret));
+ }
+ return ret;
}
static void vfio_host_win_add(VFIOContainer *container,
@@ -1263,10 +1234,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
}
- ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
+ ret = CONT_DIRTY_BITMAP(container, &dirty, NULL);
if (ret) {
error_report("Failed to set dirty tracking flag 0x%x errno: %d",
- dirty.flags, errno);
+ dirty.flags, -ret);
}
}
@@ -1316,11 +1287,11 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
goto err_out;
}
- ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
+ ret = CONT_DIRTY_BITMAP(container, dbitmap, range);
if (ret) {
error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64
" size: 0x%"PRIx64" err: %d", (uint64_t)range->iova,
- (uint64_t)range->size, errno);
+ (uint64_t)range->size, -ret);
goto err_out;
}
@@ -2090,6 +2061,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
container->error = NULL;
container->dirty_pages_supported = false;
container->dma_max_mappings = 0;
+ container->io_ops = &vfio_cont_io_ioctl;
QLIST_INIT(&container->giommu_list);
QLIST_INIT(&container->hostwin_list);
QLIST_INIT(&container->vrdl_list);
@@ -2626,3 +2598,73 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
}
return vfio_eeh_container_op(container, op);
}
+
+/*
+ * Traditional ioctl() based io_ops
+ */
+
+static int vfio_io_dma_map(VFIOContainer *container,
+ struct vfio_iommu_type1_dma_map *map)
+{
+
+ /*
+ * Try the mapping, if it fails with EBUSY, unmap the region and try
+ * again. This shouldn't be necessary, but we sometimes see it in
+ * the VGA ROM space.
+ */
+ if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, map) == 0 ||
+ (errno == EBUSY &&
+ vfio_dma_unmap(container, map->iova, map->size, NULL) == 0 &&
+ ioctl(container->fd, VFIO_IOMMU_MAP_DMA, map) == 0)) {
+ return 0;
+ }
+ return -errno;
+}
+
+static int vfio_io_dma_unmap(VFIOContainer *container,
+ struct vfio_iommu_type1_dma_unmap *unmap,
+ struct vfio_bitmap *bitmap)
+{
+
+ while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap)) {
+ /*
+ * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
+ * v4.15) where an overflow in its wrap-around check prevents us from
+ * unmapping the last page of the address space. Test for the error
+ * condition and re-try the unmap excluding the last page. The
+ * expectation is that we've never mapped the last page anyway and this
+ * unmap request comes via vIOMMU support which also makes it unlikely
+ * that this page is used. This bug was introduced well after type1 v2
+ * support was introduced, so we shouldn't need to test for v1. A fix
+ * is queued for kernel v5.0 so this workaround can be removed once
+ * affected kernels are sufficiently deprecated.
+ */
+ if (errno == EINVAL && unmap->size && !(unmap->iova + unmap->size) &&
+ container->iommu_type == VFIO_TYPE1v2_IOMMU) {
+ trace_vfio_dma_unmap_overflow_workaround();
+ unmap->size -= 1ULL << ctz64(container->pgsizes);
+ continue;
+ }
+ error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
+ return -errno;
+ }
+
+ return 0;
+}
+
+static int vfio_io_dirty_bitmap(VFIOContainer *container,
+ struct vfio_iommu_type1_dirty_bitmap *bitmap,
+ struct vfio_iommu_type1_dirty_bitmap_get *range)
+{
+ int ret;
+
+ ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, bitmap);
+
+ return ret < 0 ? -errno : ret;
+}
+
+VFIOContIO vfio_cont_io_ioctl = {
+ .dma_map = vfio_io_dma_map,
+ .dma_unmap = vfio_io_dma_unmap,
+ .dirty_bitmap = vfio_io_dirty_bitmap,
+};
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index e573f5a..6fd40f1 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -75,6 +75,7 @@ typedef struct VFIOAddressSpace {
} VFIOAddressSpace;
struct VFIOGroup;
+typedef struct VFIOContIO VFIOContIO;
typedef struct VFIOContainer {
VFIOAddressSpace *space;
@@ -83,6 +84,7 @@ typedef struct VFIOContainer {
MemoryListener prereg_listener;
unsigned iommu_type;
Error *error;
+ VFIOContIO *io_ops;
bool initialized;
bool dirty_pages_supported;
uint64_t dirty_pgsizes;
@@ -154,6 +156,37 @@ struct VFIODeviceOps {
int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
};
+#ifdef CONFIG_LINUX
+
+/*
+ * The next 2 ops vectors are how Devices and Containers
+ * communicate with the server. The default option is
+ * through ioctl() to the kernel VFIO driver, but vfio-user
+ * can use a socket to a remote process.
+ */
+
+struct VFIOContIO {
+ int (*dma_map)(VFIOContainer *container,
+ struct vfio_iommu_type1_dma_map *map);
+ int (*dma_unmap)(VFIOContainer *container,
+ struct vfio_iommu_type1_dma_unmap *unmap,
+ struct vfio_bitmap *bitmap);
+ int (*dirty_bitmap)(VFIOContainer *container,
+ struct vfio_iommu_type1_dirty_bitmap *bitmap,
+ struct vfio_iommu_type1_dirty_bitmap_get *range);
+};
+
+#define CONT_DMA_MAP(cont, map) \
+ ((cont)->io_ops->dma_map((cont), (map)))
+#define CONT_DMA_UNMAP(cont, unmap, bitmap) \
+ ((cont)->io_ops->dma_unmap((cont), (unmap), (bitmap)))
+#define CONT_DIRTY_BITMAP(cont, bitmap, range) \
+ ((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range)))
+
+extern VFIOContIO vfio_cont_io_ioctl;
+
+#endif /* CONFIG_LINUX */
+
typedef struct VFIOGroup {
int fd;
int groupid;
--
1.8.3.1
Hello John,
On 11/9/22 00:13, John Johnson wrote:
> Used for communication with VFIO driver
> (prep work for vfio-user, which will communicate over a socket)
>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> ---
> hw/vfio/common.c | 126 ++++++++++++++++++++++++++++--------------
> include/hw/vfio/vfio-common.h | 33 +++++++++++
> 2 files changed, 117 insertions(+), 42 deletions(-)
>
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index ace9562..83d69b9 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -432,12 +432,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
> goto unmap_exit;
> }
>
> - ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
> + ret = CONT_DMA_UNMAP(container, unmap, bitmap);
I am not sure these macros are very useful, compared to :
container->ops->dma_unmap(container, unmap, bitmap);
> if (!ret) {
> cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data,
> iotlb->translated_addr, pages);
> } else {
> - error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
> + error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %s", strerror(-ret));
> }
>
> g_free(bitmap->data);
> @@ -465,30 +465,7 @@ static int vfio_dma_unmap(VFIOContainer *container,
> return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
> }
>
> - while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
> - /*
> - * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
> - * v4.15) where an overflow in its wrap-around check prevents us from
> - * unmapping the last page of the address space. Test for the error
> - * condition and re-try the unmap excluding the last page. The
> - * expectation is that we've never mapped the last page anyway and this
> - * unmap request comes via vIOMMU support which also makes it unlikely
> - * that this page is used. This bug was introduced well after type1 v2
> - * support was introduced, so we shouldn't need to test for v1. A fix
> - * is queued for kernel v5.0 so this workaround can be removed once
> - * affected kernels are sufficiently deprecated.
> - */
> - if (errno == EINVAL && unmap.size && !(unmap.iova + unmap.size) &&
> - container->iommu_type == VFIO_TYPE1v2_IOMMU) {
> - trace_vfio_dma_unmap_overflow_workaround();
> - unmap.size -= 1ULL << ctz64(container->pgsizes);
> - continue;
> - }
> - error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
> - return -errno;
> - }
> -
> - return 0;
> + return CONT_DMA_UNMAP(container, &unmap, NULL);
> }
>
> static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
> @@ -501,24 +478,18 @@ static int vfio_dma_map(VFIOContainer *container, hwaddr iova,
> .iova = iova,
> .size = size,
> };
> + int ret;
>
> if (!readonly) {
> map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
> }
>
> - /*
> - * Try the mapping, if it fails with EBUSY, unmap the region and try
> - * again. This shouldn't be necessary, but we sometimes see it in
> - * the VGA ROM space.
> - */
> - if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
> - (errno == EBUSY && vfio_dma_unmap(container, iova, size, NULL) == 0 &&
> - ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
> - return 0;
> - }
> + ret = CONT_DMA_MAP(container, &map);
>
> - error_report("VFIO_MAP_DMA failed: %s", strerror(errno));
> - return -errno;
> + if (ret < 0) {
> + error_report("VFIO_MAP_DMA failed: %s", strerror(-ret));
> + }
> + return ret;
> }
>
> static void vfio_host_win_add(VFIOContainer *container,
> @@ -1263,10 +1234,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
> dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_STOP;
> }
>
> - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, &dirty);
> + ret = CONT_DIRTY_BITMAP(container, &dirty, NULL);
> if (ret) {
> error_report("Failed to set dirty tracking flag 0x%x errno: %d",
> - dirty.flags, errno);
> + dirty.flags, -ret);
> }
> }
>
> @@ -1316,11 +1287,11 @@ static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
> goto err_out;
> }
>
> - ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
> + ret = CONT_DIRTY_BITMAP(container, dbitmap, range);
> if (ret) {
> error_report("Failed to get dirty bitmap for iova: 0x%"PRIx64
> " size: 0x%"PRIx64" err: %d", (uint64_t)range->iova,
> - (uint64_t)range->size, errno);
> + (uint64_t)range->size, -ret);
> goto err_out;
> }
>
> @@ -2090,6 +2061,7 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
> container->error = NULL;
> container->dirty_pages_supported = false;
> container->dma_max_mappings = 0;
> + container->io_ops = &vfio_cont_io_ioctl;
> QLIST_INIT(&container->giommu_list);
> QLIST_INIT(&container->hostwin_list);
> QLIST_INIT(&container->vrdl_list);
> @@ -2626,3 +2598,73 @@ int vfio_eeh_as_op(AddressSpace *as, uint32_t op)
> }
> return vfio_eeh_container_op(container, op);
> }
> +
> +/*
> + * Traditional ioctl() based io_ops
> + */
> +
> +static int vfio_io_dma_map(VFIOContainer *container,
> + struct vfio_iommu_type1_dma_map *map)
> +{
> +
> + /*
> + * Try the mapping, if it fails with EBUSY, unmap the region and try
> + * again. This shouldn't be necessary, but we sometimes see it in
> + * the VGA ROM space.
> + */
> + if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, map) == 0 ||
> + (errno == EBUSY &&
> + vfio_dma_unmap(container, map->iova, map->size, NULL) == 0 &&
> + ioctl(container->fd, VFIO_IOMMU_MAP_DMA, map) == 0)) {
> + return 0;
> + }
> + return -errno;
> +}
> +
> +static int vfio_io_dma_unmap(VFIOContainer *container,
> + struct vfio_iommu_type1_dma_unmap *unmap,
> + struct vfio_bitmap *bitmap)
> +{
> +
> + while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap)) {
> + /*
> + * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
> + * v4.15) where an overflow in its wrap-around check prevents us from
> + * unmapping the last page of the address space. Test for the error
> + * condition and re-try the unmap excluding the last page. The
> + * expectation is that we've never mapped the last page anyway and this
> + * unmap request comes via vIOMMU support which also makes it unlikely
> + * that this page is used. This bug was introduced well after type1 v2
> + * support was introduced, so we shouldn't need to test for v1. A fix
> + * is queued for kernel v5.0 so this workaround can be removed once
> + * affected kernels are sufficiently deprecated.
> + */
> + if (errno == EINVAL && unmap->size && !(unmap->iova + unmap->size) &&
> + container->iommu_type == VFIO_TYPE1v2_IOMMU) {
> + trace_vfio_dma_unmap_overflow_workaround();
> + unmap->size -= 1ULL << ctz64(container->pgsizes);
> + continue;
> + }
> + error_report("VFIO_UNMAP_DMA failed: %s", strerror(errno));
> + return -errno;
> + }
> +
> + return 0;
> +}
> +
> +static int vfio_io_dirty_bitmap(VFIOContainer *container,
> + struct vfio_iommu_type1_dirty_bitmap *bitmap,
> + struct vfio_iommu_type1_dirty_bitmap_get *range)
> +{
> + int ret;
> +
> + ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, bitmap);
> +
> + return ret < 0 ? -errno : ret;
> +}
> +
> +VFIOContIO vfio_cont_io_ioctl = {
> + .dma_map = vfio_io_dma_map,
> + .dma_unmap = vfio_io_dma_unmap,
> + .dirty_bitmap = vfio_io_dirty_bitmap,
> +};
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index e573f5a..6fd40f1 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -75,6 +75,7 @@ typedef struct VFIOAddressSpace {
> } VFIOAddressSpace;
>
> struct VFIOGroup;
> +typedef struct VFIOContIO VFIOContIO;
>
> typedef struct VFIOContainer {
> VFIOAddressSpace *space;
> @@ -83,6 +84,7 @@ typedef struct VFIOContainer {
> MemoryListener prereg_listener;
> unsigned iommu_type;
> Error *error;
> + VFIOContIO *io_ops;
ops should be enough.
> bool initialized;
> bool dirty_pages_supported;
> uint64_t dirty_pgsizes;
> @@ -154,6 +156,37 @@ struct VFIODeviceOps {
> int (*vfio_load_config)(VFIODevice *vdev, QEMUFile *f);
> };
>
> +#ifdef CONFIG_LINUX
> +
> +/*
> + * The next 2 ops vectors are how Devices and Containers
> + * communicate with the server. The default option is
> + * through ioctl() to the kernel VFIO driver, but vfio-user
> + * can use a socket to a remote process.
> + */
> +
> +struct VFIOContIO {
VFIOContainerOps seems more adequate with the current VFIO terminology
in QEMU.
Thanks,
C.
> + int (*dma_map)(VFIOContainer *container,
> + struct vfio_iommu_type1_dma_map *map);
> + int (*dma_unmap)(VFIOContainer *container,
> + struct vfio_iommu_type1_dma_unmap *unmap,
> + struct vfio_bitmap *bitmap);
> + int (*dirty_bitmap)(VFIOContainer *container,
> + struct vfio_iommu_type1_dirty_bitmap *bitmap,
> + struct vfio_iommu_type1_dirty_bitmap_get *range);
> +};
> +
> +#define CONT_DMA_MAP(cont, map) \
> + ((cont)->io_ops->dma_map((cont), (map)))
> +#define CONT_DMA_UNMAP(cont, unmap, bitmap) \
> + ((cont)->io_ops->dma_unmap((cont), (unmap), (bitmap)))
> +#define CONT_DIRTY_BITMAP(cont, bitmap, range) \
> + ((cont)->io_ops->dirty_bitmap((cont), (bitmap), (range)))
> +
> +extern VFIOContIO vfio_cont_io_ioctl;
> +
> +#endif /* CONFIG_LINUX */
> +
> typedef struct VFIOGroup {
> int fd;
> int groupid;
On 9/12/22 17:10, Cédric Le Goater wrote:
> Hello John,
>
> On 11/9/22 00:13, John Johnson wrote:
>> Used for communication with VFIO driver
>> (prep work for vfio-user, which will communicate over a socket)
>>
>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>> ---
>> hw/vfio/common.c | 126
>> ++++++++++++++++++++++++++++--------------
>> include/hw/vfio/vfio-common.h | 33 +++++++++++
>> 2 files changed, 117 insertions(+), 42 deletions(-)
>>
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index ace9562..83d69b9 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -432,12 +432,12 @@ static int vfio_dma_unmap_bitmap(VFIOContainer
>> *container,
>> goto unmap_exit;
>> }
>> - ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
>> + ret = CONT_DMA_UNMAP(container, unmap, bitmap);
>
> I am not sure these macros are very useful, compared to :
>
> container->ops->dma_unmap(container, unmap, bitmap);
I was going to report the same.
>> +/*
>> + * The next 2 ops vectors are how Devices and Containers
>> + * communicate with the server. The default option is
>> + * through ioctl() to the kernel VFIO driver, but vfio-user
>> + * can use a socket to a remote process.
>> + */
>> +
>> +struct VFIOContIO {
>
> VFIOContainerOps seems more adequate with the current VFIO terminology
> in QEMU.
Yes please, abbreviated "Cont" is not helpful.
On Tue, Nov 08, 2022 at 03:13:25PM -0800, John Johnson wrote: > Used for communication with VFIO driver > (prep work for vfio-user, which will communicate over a socket) > index e573f5a..6fd40f1 100644 > --- a/include/hw/vfio/vfio-common.h > +++ b/include/hw/vfio/vfio-common.h > + > +extern VFIOContIO vfio_cont_io_ioctl; Nit, there's no need for this to be non-static, it's only used in hw/vfio/common.c regards john
© 2016 - 2026 Red Hat, Inc.