For vfio-user, each region has its own fd rather than sharing
vbasedev's. Add the necessary plumbing to support this, and use the
correct fd in vfio_region_mmap().
Signed-off-by: John Levon <john.levon@nutanix.com>
---
include/hw/vfio/vfio-device.h | 7 +++++--
hw/vfio/device.c | 29 +++++++++++++++++++++++++----
hw/vfio/region.c | 9 +++++++--
3 files changed, 37 insertions(+), 8 deletions(-)
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index 923f9cd116..5cb817fd6a 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -66,6 +66,7 @@ typedef struct VFIODevice {
OnOffAuto enable_migration;
OnOffAuto migration_multifd_transfer;
bool migration_events;
+ bool use_region_fds;
VFIODeviceOps *ops;
VFIODeviceIOOps *io_ops;
unsigned int num_irqs;
@@ -84,6 +85,7 @@ typedef struct VFIODevice {
VFIOIOASHwpt *hwpt;
QLIST_ENTRY(VFIODevice) hwpt_next;
struct vfio_region_info **reginfo;
+ int *region_fds;
} VFIODevice;
struct VFIODeviceOps {
@@ -172,10 +174,11 @@ struct VFIODeviceIOOps {
/**
* @get_region_info
*
- * Fill in @info with information on the region given by @info->index.
+ * Fill in @info (and optionally @fd) with information on the region given
+ * by @info->index.
*/
int (*get_region_info)(VFIODevice *vdev,
- struct vfio_region_info *info);
+ struct vfio_region_info *info, int *fd);
/**
* @get_irq_info
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index d0068086ae..29a8d72deb 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -226,6 +226,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
struct vfio_region_info **info)
{
size_t argsz = sizeof(struct vfio_region_info);
+ int fd = -1;
int ret;
/* check cache */
@@ -240,7 +241,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
retry:
(*info)->argsz = argsz;
- ret = vbasedev->io_ops->get_region_info(vbasedev, *info);
+ ret = vbasedev->io_ops->get_region_info(vbasedev, *info, &fd);
if (ret != 0) {
g_free(*info);
*info = NULL;
@@ -251,11 +252,19 @@ retry:
argsz = (*info)->argsz;
*info = g_realloc(*info, argsz);
+ if (fd != -1) {
+ close(fd);
+ fd = -1;
+ }
+
goto retry;
}
/* fill cache */
vbasedev->reginfo[index] = *info;
+ if (vbasedev->region_fds != NULL) {
+ vbasedev->region_fds[index] = fd;
+ }
return 0;
}
@@ -360,6 +369,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
vbasedev->io_ops = &vfio_device_io_ops_ioctl;
vbasedev->dev = dev;
vbasedev->fd = -1;
+ vbasedev->use_region_fds = false;
vbasedev->ram_block_discard_allowed = ram_discard;
}
@@ -470,6 +480,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
vbasedev->reginfo = g_new0(struct vfio_region_info *,
vbasedev->num_regions);
+ if (vbasedev->use_region_fds) {
+ vbasedev->region_fds = g_new0(int, vbasedev->num_regions);
+ }
}
void vfio_device_unprepare(VFIODevice *vbasedev)
@@ -478,9 +491,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
for (i = 0; i < vbasedev->num_regions; i++) {
g_free(vbasedev->reginfo[i]);
+ if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
+ close(vbasedev->region_fds[i]);
+ }
+
}
- g_free(vbasedev->reginfo);
- vbasedev->reginfo = NULL;
+
+ g_clear_pointer(&vbasedev->reginfo, g_free);
+ g_clear_pointer(&vbasedev->region_fds, g_free);
QLIST_REMOVE(vbasedev, container_next);
QLIST_REMOVE(vbasedev, global_next);
@@ -502,10 +520,13 @@ static int vfio_device_io_device_feature(VFIODevice *vbasedev,
}
static int vfio_device_io_get_region_info(VFIODevice *vbasedev,
- struct vfio_region_info *info)
+ struct vfio_region_info *info,
+ int *fd)
{
int ret;
+ *fd = -1;
+
ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
return ret < 0 ? -errno : ret;
diff --git a/hw/vfio/region.c b/hw/vfio/region.c
index 34752c3f65..cb172f2136 100644
--- a/hw/vfio/region.c
+++ b/hw/vfio/region.c
@@ -241,6 +241,7 @@ int vfio_region_mmap(VFIORegion *region)
{
int i, ret, prot = 0;
char *name;
+ int fd;
if (!region->mem) {
return 0;
@@ -271,14 +272,18 @@ int vfio_region_mmap(VFIORegion *region)
goto no_mmap;
}
+ /* Use the per-region fd if set, or the shared fd. */
+ fd = region->vbasedev->region_fds ?
+ region->vbasedev->region_fds[region->nr] :
+ region->vbasedev->fd,
+
map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
munmap(map_base, map_align - map_base);
munmap(map_align + region->mmaps[i].size,
align - (map_align - map_base));
region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
- MAP_SHARED | MAP_FIXED,
- region->vbasedev->fd,
+ MAP_SHARED | MAP_FIXED, fd,
region->fd_offset +
region->mmaps[i].offset);
if (region->mmaps[i].mmap == MAP_FAILED) {
--
2.43.0
On 6/7/25 02:10, John Levon wrote:
> For vfio-user, each region has its own fd rather than sharing
> vbasedev's. Add the necessary plumbing to support this, and use the
> correct fd in vfio_region_mmap().
>
> Signed-off-by: John Levon <john.levon@nutanix.com>
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Applied to vfio-next.
Thanks,
C.
> ---
> include/hw/vfio/vfio-device.h | 7 +++++--
> hw/vfio/device.c | 29 +++++++++++++++++++++++++----
> hw/vfio/region.c | 9 +++++++--
> 3 files changed, 37 insertions(+), 8 deletions(-)
>
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 923f9cd116..5cb817fd6a 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -66,6 +66,7 @@ typedef struct VFIODevice {
> OnOffAuto enable_migration;
> OnOffAuto migration_multifd_transfer;
> bool migration_events;
> + bool use_region_fds;
> VFIODeviceOps *ops;
> VFIODeviceIOOps *io_ops;
> unsigned int num_irqs;
> @@ -84,6 +85,7 @@ typedef struct VFIODevice {
> VFIOIOASHwpt *hwpt;
> QLIST_ENTRY(VFIODevice) hwpt_next;
> struct vfio_region_info **reginfo;
> + int *region_fds;
> } VFIODevice;
>
> struct VFIODeviceOps {
> @@ -172,10 +174,11 @@ struct VFIODeviceIOOps {
> /**
> * @get_region_info
> *
> - * Fill in @info with information on the region given by @info->index.
> + * Fill in @info (and optionally @fd) with information on the region given
> + * by @info->index.
> */
> int (*get_region_info)(VFIODevice *vdev,
> - struct vfio_region_info *info);
> + struct vfio_region_info *info, int *fd);
>
> /**
> * @get_irq_info
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index d0068086ae..29a8d72deb 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -226,6 +226,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
> struct vfio_region_info **info)
> {
> size_t argsz = sizeof(struct vfio_region_info);
> + int fd = -1;
> int ret;
>
> /* check cache */
> @@ -240,7 +241,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
> retry:
> (*info)->argsz = argsz;
>
> - ret = vbasedev->io_ops->get_region_info(vbasedev, *info);
> + ret = vbasedev->io_ops->get_region_info(vbasedev, *info, &fd);
> if (ret != 0) {
> g_free(*info);
> *info = NULL;
> @@ -251,11 +252,19 @@ retry:
> argsz = (*info)->argsz;
> *info = g_realloc(*info, argsz);
>
> + if (fd != -1) {
> + close(fd);
> + fd = -1;
> + }
> +
> goto retry;
> }
>
> /* fill cache */
> vbasedev->reginfo[index] = *info;
> + if (vbasedev->region_fds != NULL) {
> + vbasedev->region_fds[index] = fd;
> + }
>
> return 0;
> }
> @@ -360,6 +369,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
> vbasedev->io_ops = &vfio_device_io_ops_ioctl;
> vbasedev->dev = dev;
> vbasedev->fd = -1;
> + vbasedev->use_region_fds = false;
>
> vbasedev->ram_block_discard_allowed = ram_discard;
> }
> @@ -470,6 +480,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
>
> vbasedev->reginfo = g_new0(struct vfio_region_info *,
> vbasedev->num_regions);
> + if (vbasedev->use_region_fds) {
> + vbasedev->region_fds = g_new0(int, vbasedev->num_regions);
> + }
> }
>
> void vfio_device_unprepare(VFIODevice *vbasedev)
> @@ -478,9 +491,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
>
> for (i = 0; i < vbasedev->num_regions; i++) {
> g_free(vbasedev->reginfo[i]);
> + if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
> + close(vbasedev->region_fds[i]);
> + }
> +
> }
> - g_free(vbasedev->reginfo);
> - vbasedev->reginfo = NULL;
> +
> + g_clear_pointer(&vbasedev->reginfo, g_free);
> + g_clear_pointer(&vbasedev->region_fds, g_free);
>
> QLIST_REMOVE(vbasedev, container_next);
> QLIST_REMOVE(vbasedev, global_next);
> @@ -502,10 +520,13 @@ static int vfio_device_io_device_feature(VFIODevice *vbasedev,
> }
>
> static int vfio_device_io_get_region_info(VFIODevice *vbasedev,
> - struct vfio_region_info *info)
> + struct vfio_region_info *info,
> + int *fd)
> {
> int ret;
>
> + *fd = -1;
> +
> ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
>
> return ret < 0 ? -errno : ret;
> diff --git a/hw/vfio/region.c b/hw/vfio/region.c
> index 34752c3f65..cb172f2136 100644
> --- a/hw/vfio/region.c
> +++ b/hw/vfio/region.c
> @@ -241,6 +241,7 @@ int vfio_region_mmap(VFIORegion *region)
> {
> int i, ret, prot = 0;
> char *name;
> + int fd;
>
> if (!region->mem) {
> return 0;
> @@ -271,14 +272,18 @@ int vfio_region_mmap(VFIORegion *region)
> goto no_mmap;
> }
>
> + /* Use the per-region fd if set, or the shared fd. */
> + fd = region->vbasedev->region_fds ?
> + region->vbasedev->region_fds[region->nr] :
> + region->vbasedev->fd,
> +
> map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
> munmap(map_base, map_align - map_base);
> munmap(map_align + region->mmaps[i].size,
> align - (map_align - map_base));
>
> region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
> - MAP_SHARED | MAP_FIXED,
> - region->vbasedev->fd,
> + MAP_SHARED | MAP_FIXED, fd,
> region->fd_offset +
> region->mmaps[i].offset);
> if (region->mmaps[i].mmap == MAP_FAILED) {
On 07/06/2025 01:10, John Levon wrote:
> For vfio-user, each region has its own fd rather than sharing
> vbasedev's. Add the necessary plumbing to support this, and use the
> correct fd in vfio_region_mmap().
>
> Signed-off-by: John Levon <john.levon@nutanix.com>
> ---
> include/hw/vfio/vfio-device.h | 7 +++++--
> hw/vfio/device.c | 29 +++++++++++++++++++++++++----
> hw/vfio/region.c | 9 +++++++--
> 3 files changed, 37 insertions(+), 8 deletions(-)
>
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 923f9cd116..5cb817fd6a 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -66,6 +66,7 @@ typedef struct VFIODevice {
> OnOffAuto enable_migration;
> OnOffAuto migration_multifd_transfer;
> bool migration_events;
> + bool use_region_fds;
> VFIODeviceOps *ops;
> VFIODeviceIOOps *io_ops;
> unsigned int num_irqs;
> @@ -84,6 +85,7 @@ typedef struct VFIODevice {
> VFIOIOASHwpt *hwpt;
> QLIST_ENTRY(VFIODevice) hwpt_next;
> struct vfio_region_info **reginfo;
> + int *region_fds;
> } VFIODevice;
>
> struct VFIODeviceOps {
> @@ -172,10 +174,11 @@ struct VFIODeviceIOOps {
> /**
> * @get_region_info
> *
> - * Fill in @info with information on the region given by @info->index.
> + * Fill in @info (and optionally @fd) with information on the region given
> + * by @info->index.
> */
> int (*get_region_info)(VFIODevice *vdev,
> - struct vfio_region_info *info);
> + struct vfio_region_info *info, int *fd);
>
> /**
> * @get_irq_info
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index d0068086ae..29a8d72deb 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -226,6 +226,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
> struct vfio_region_info **info)
> {
> size_t argsz = sizeof(struct vfio_region_info);
> + int fd = -1;
> int ret;
>
> /* check cache */
> @@ -240,7 +241,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
> retry:
> (*info)->argsz = argsz;
>
> - ret = vbasedev->io_ops->get_region_info(vbasedev, *info);
> + ret = vbasedev->io_ops->get_region_info(vbasedev, *info, &fd);
> if (ret != 0) {
> g_free(*info);
> *info = NULL;
> @@ -251,11 +252,19 @@ retry:
> argsz = (*info)->argsz;
> *info = g_realloc(*info, argsz);
>
> + if (fd != -1) {
> + close(fd);
> + fd = -1;
> + }
> +
> goto retry;
> }
>
> /* fill cache */
> vbasedev->reginfo[index] = *info;
> + if (vbasedev->region_fds != NULL) {
> + vbasedev->region_fds[index] = fd;
> + }
>
> return 0;
> }
> @@ -360,6 +369,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
> vbasedev->io_ops = &vfio_device_io_ops_ioctl;
> vbasedev->dev = dev;
> vbasedev->fd = -1;
> + vbasedev->use_region_fds = false;
>
> vbasedev->ram_block_discard_allowed = ram_discard;
> }
> @@ -470,6 +480,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
>
> vbasedev->reginfo = g_new0(struct vfio_region_info *,
> vbasedev->num_regions);
> + if (vbasedev->use_region_fds) {
> + vbasedev->region_fds = g_new0(int, vbasedev->num_regions);
> + }
> }
>
> void vfio_device_unprepare(VFIODevice *vbasedev)
> @@ -478,9 +491,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
>
> for (i = 0; i < vbasedev->num_regions; i++) {
> g_free(vbasedev->reginfo[i]);
> + if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
> + close(vbasedev->region_fds[i]);
> + }
> +
> }
> - g_free(vbasedev->reginfo);
> - vbasedev->reginfo = NULL;
> +
> + g_clear_pointer(&vbasedev->reginfo, g_free);
> + g_clear_pointer(&vbasedev->region_fds, g_free);
>
> QLIST_REMOVE(vbasedev, container_next);
> QLIST_REMOVE(vbasedev, global_next);
> @@ -502,10 +520,13 @@ static int vfio_device_io_device_feature(VFIODevice *vbasedev,
> }
>
> static int vfio_device_io_get_region_info(VFIODevice *vbasedev,
> - struct vfio_region_info *info)
> + struct vfio_region_info *info,
> + int *fd)
> {
> int ret;
>
> + *fd = -1;
> +
> ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
>
> return ret < 0 ? -errno : ret;
> diff --git a/hw/vfio/region.c b/hw/vfio/region.c
> index 34752c3f65..cb172f2136 100644
> --- a/hw/vfio/region.c
> +++ b/hw/vfio/region.c
> @@ -241,6 +241,7 @@ int vfio_region_mmap(VFIORegion *region)
> {
> int i, ret, prot = 0;
> char *name;
> + int fd;
>
> if (!region->mem) {
> return 0;
> @@ -271,14 +272,18 @@ int vfio_region_mmap(VFIORegion *region)
> goto no_mmap;
> }
>
> + /* Use the per-region fd if set, or the shared fd. */
> + fd = region->vbasedev->region_fds ?
> + region->vbasedev->region_fds[region->nr] :
> + region->vbasedev->fd,
> +
This feels a bit odd: if you're asking to map a VFIORegion then
shouldn't that contain all the information required for the mapping,
including the fd?
(goes and looks)
It appears that there is already a fd field in VFIORegion so is there a
reason why we can't use this instead of adding region_fds to vbasedev?
> map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
> munmap(map_base, map_align - map_base);
> munmap(map_align + region->mmaps[i].size,
> align - (map_align - map_base));
>
> region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
> - MAP_SHARED | MAP_FIXED,
> - region->vbasedev->fd,
> + MAP_SHARED | MAP_FIXED, fd,
> region->fd_offset +
> region->mmaps[i].offset);
> if (region->mmaps[i].mmap == MAP_FAILED) {
ATB,
Mark.
On Tue, Jun 10, 2025 at 11:58:18AM +0100, Mark Cave-Ayland wrote: > > + /* Use the per-region fd if set, or the shared fd. */ > > + fd = region->vbasedev->region_fds ? > > + region->vbasedev->region_fds[region->nr] : > > + region->vbasedev->fd, > > + > > This feels a bit odd: if you're asking to map a VFIORegion then shouldn't > that contain all the information required for the mapping, including the fd? > > (goes and looks) > > It appears that there is already a fd field in VFIORegion so is there a > reason why we can't use this instead of adding region_fds to vbasedev? region->fd was in earlier patchsets, but dropped as requested by Cédric. So we must look it up here. regards john
On 6/10/25 13:27, John Levon wrote: > On Tue, Jun 10, 2025 at 11:58:18AM +0100, Mark Cave-Ayland wrote: > >>> + /* Use the per-region fd if set, or the shared fd. */ >>> + fd = region->vbasedev->region_fds ? >>> + region->vbasedev->region_fds[region->nr] : >>> + region->vbasedev->fd, >>> + >> >> This feels a bit odd: if you're asking to map a VFIORegion then shouldn't >> that contain all the information required for the mapping, including the fd? >> >> (goes and looks) >> >> It appears that there is already a fd field in VFIORegion so is there a >> reason why we can't use this instead of adding region_fds to vbasedev? > > region->fd was in earlier patchsets, but dropped as requested by Cédric. So we > must look it up here. yes. It didn't seem necessary to store the same information under multiple structs. C.
On Tue, Jun 10, 2025 at 01:57:23PM +0200, Cédric Le Goater wrote: > On 6/10/25 13:27, John Levon wrote: > > On Tue, Jun 10, 2025 at 11:58:18AM +0100, Mark Cave-Ayland wrote: > > > > > > + /* Use the per-region fd if set, or the shared fd. */ > > > > + fd = region->vbasedev->region_fds ? > > > > + region->vbasedev->region_fds[region->nr] : > > > > + region->vbasedev->fd, > > > > + > > > > > > This feels a bit odd: if you're asking to map a VFIORegion then shouldn't > > > that contain all the information required for the mapping, including the fd? > > > > > > (goes and looks) > > > > > > It appears that there is already a fd field in VFIORegion so is there a > > > reason why we can't use this instead of adding region_fds to vbasedev? > > > > region->fd was in earlier patchsets, but dropped as requested by Cédric. So we > > must look it up here. > > yes. It didn't seem necessary to store the same information under > multiple structs. And the reason we have ->region_fds at all rather than just storing directly in region->fd is that vfio_device_get_region_info() doesn't take a VFIORegion argument, as it's used in other contexts too. regards john
On 6/7/25 02:10, John Levon wrote:
> For vfio-user, each region has its own fd rather than sharing
> vbasedev's. Add the necessary plumbing to support this, and use the
> correct fd in vfio_region_mmap().
>
> Signed-off-by: John Levon <john.levon@nutanix.com>
> ---
> include/hw/vfio/vfio-device.h | 7 +++++--
> hw/vfio/device.c | 29 +++++++++++++++++++++++++----
> hw/vfio/region.c | 9 +++++++--
> 3 files changed, 37 insertions(+), 8 deletions(-)
>
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 923f9cd116..5cb817fd6a 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -66,6 +66,7 @@ typedef struct VFIODevice {
> OnOffAuto enable_migration;
> OnOffAuto migration_multifd_transfer;
> bool migration_events;
> + bool use_region_fds;
> VFIODeviceOps *ops;
> VFIODeviceIOOps *io_ops;
> unsigned int num_irqs;
> @@ -84,6 +85,7 @@ typedef struct VFIODevice {
> VFIOIOASHwpt *hwpt;
> QLIST_ENTRY(VFIODevice) hwpt_next;
> struct vfio_region_info **reginfo;
> + int *region_fds;
> } VFIODevice;
>
> struct VFIODeviceOps {
> @@ -172,10 +174,11 @@ struct VFIODeviceIOOps {
> /**
> * @get_region_info
> *
> - * Fill in @info with information on the region given by @info->index.
> + * Fill in @info (and optionally @fd) with information on the region given
> + * by @info->index.
> */
Could you please update the whole documentation of the VFIODeviceIOOps
struct and document each parameter ?
> int (*get_region_info)(VFIODevice *vdev,
> - struct vfio_region_info *info);
> + struct vfio_region_info *info, int *fd);
>
> /**
> * @get_irq_info
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index d0068086ae..29a8d72deb 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -226,6 +226,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
> struct vfio_region_info **info)
> {
> size_t argsz = sizeof(struct vfio_region_info);
> + int fd = -1;
> int ret;
>
> /* check cache */
> @@ -240,7 +241,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
> retry:
> (*info)->argsz = argsz;
>
> - ret = vbasedev->io_ops->get_region_info(vbasedev, *info);
> + ret = vbasedev->io_ops->get_region_info(vbasedev, *info, &fd);
> if (ret != 0) {
> g_free(*info);
> *info = NULL;
> @@ -251,11 +252,19 @@ retry:
> argsz = (*info)->argsz;
> *info = g_realloc(*info, argsz);
>
> + if (fd != -1) {
> + close(fd);
> + fd = -1;
> + }
> +
> goto retry;
> }
>
> /* fill cache */
> vbasedev->reginfo[index] = *info;
> + if (vbasedev->region_fds != NULL) {
> + vbasedev->region_fds[index] = fd;
> + }
>
> return 0;
> }
> @@ -360,6 +369,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
> vbasedev->io_ops = &vfio_device_io_ops_ioctl;
> vbasedev->dev = dev;
> vbasedev->fd = -1;
> + vbasedev->use_region_fds = false;
why not extend vfio_device_init() with a 'region_fd_cache' bool
parameter instead ? and avoid the extra VFIODevice attribute.
>
> vbasedev->ram_block_discard_allowed = ram_discard;
> }
> @@ -470,6 +480,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
>
> vbasedev->reginfo = g_new0(struct vfio_region_info *,
> vbasedev->num_regions);
> + if (vbasedev->use_region_fds) {
> + vbasedev->region_fds = g_new0(int, vbasedev->num_regions);
> + }
> }
>
> void vfio_device_unprepare(VFIODevice *vbasedev)
> @@ -478,9 +491,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
>
> for (i = 0; i < vbasedev->num_regions; i++) {
> g_free(vbasedev->reginfo[i]);
> + if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
> + close(vbasedev->region_fds[i]);
> + }
> +
Adding a small helper to retrieve the region fd from the cache,
would hide some of the implementation details here and below in
vfio_region_mmap()
> }
> - g_free(vbasedev->reginfo);
> - vbasedev->reginfo = NULL;
> +
> + g_clear_pointer(&vbasedev->reginfo, g_free);
unrelated change.
Thanks,
C.
> + g_clear_pointer(&vbasedev->region_fds, g_free);
>
> QLIST_REMOVE(vbasedev, container_next);
> QLIST_REMOVE(vbasedev, global_next);
> @@ -502,10 +520,13 @@ static int vfio_device_io_device_feature(VFIODevice *vbasedev,
> }
>
> static int vfio_device_io_get_region_info(VFIODevice *vbasedev,
> - struct vfio_region_info *info)
> + struct vfio_region_info *info,
> + int *fd)
> {
> int ret;
>
> + *fd = -1;
> +
> ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
>
> return ret < 0 ? -errno : ret;
> diff --git a/hw/vfio/region.c b/hw/vfio/region.c
> index 34752c3f65..cb172f2136 100644
> --- a/hw/vfio/region.c
> +++ b/hw/vfio/region.c
> @@ -241,6 +241,7 @@ int vfio_region_mmap(VFIORegion *region)
> {
> int i, ret, prot = 0;
> char *name;
> + int fd;
>
> if (!region->mem) {
> return 0;
> @@ -271,14 +272,18 @@ int vfio_region_mmap(VFIORegion *region)
> goto no_mmap;
> }
>
> + /* Use the per-region fd if set, or the shared fd. */
> + fd = region->vbasedev->region_fds ?
> + region->vbasedev->region_fds[region->nr] :
> + region->vbasedev->fd,
> +
> map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
> munmap(map_base, map_align - map_base);
> munmap(map_align + region->mmaps[i].size,
> align - (map_align - map_base));
>
> region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
> - MAP_SHARED | MAP_FIXED,
> - region->vbasedev->fd,
> + MAP_SHARED | MAP_FIXED, fd,
> region->fd_offset +
> region->mmaps[i].offset);
> if (region->mmaps[i].mmap == MAP_FAILED) {
On Tue, Jun 10, 2025 at 09:42:41AM +0200, Cédric Le Goater wrote:
> > @@ -478,9 +491,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
> > for (i = 0; i < vbasedev->num_regions; i++) {
> > g_free(vbasedev->reginfo[i]);
> > + if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
> > + close(vbasedev->region_fds[i]);
> > + }
>
> Adding a small helper to retrieve the region fd from the cache,
> would hide some of the implementation details here and below in
> vfio_region_mmap()
I've added vfio_device_get_region_fd() - used by vfio_region_mmap(); now the
existence of ->region_fds is private to device.c, so I think that's better.
regards
john
On Tue, Jun 10, 2025 at 09:42:41AM +0200, Cédric Le Goater wrote:
> > @@ -478,9 +491,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
> > for (i = 0; i < vbasedev->num_regions; i++) {
> > g_free(vbasedev->reginfo[i]);
> > + if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
> > + close(vbasedev->region_fds[i]);
> > + }
> > +
>
> Adding a small helper to retrieve the region fd from the cache,
> would hide some of the implementation details here and below in
> vfio_region_mmap()
I can add a region_get_fd() helper, but I can't use it here - we should only
close the fd if it's a per-region fd. I'll make this call a region_cleanup() or
similar.
regards
john
On Tue, Jun 10, 2025 at 09:42:41AM +0200, Cédric Le Goater wrote:
> On 6/7/25 02:10, John Levon wrote:
> > For vfio-user, each region has its own fd rather than sharing
> > vbasedev's. Add the necessary plumbing to support this, and use the
> > correct fd in vfio_region_mmap().
> >
> > @@ -172,10 +174,11 @@ struct VFIODeviceIOOps {
> > /**
> > * @get_region_info
> > *
> > - * Fill in @info with information on the region given by @info->index.
> > + * Fill in @info (and optionally @fd) with information on the region given
> > + * by @info->index.
> > */
>
> Could you please update the whole documentation of the VFIODeviceIOOps
> struct and document each parameter ?
Sorry, not sure what you're asking for. This struct was fully documented in
38bf025d ("vfio: add device IO ops vector")
and its subsequent patches.
> > @@ -360,6 +369,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
> > vbasedev->io_ops = &vfio_device_io_ops_ioctl;
> > vbasedev->dev = dev;
> > vbasedev->fd = -1;
> > + vbasedev->use_region_fds = false;
>
> why not extend vfio_device_init() with a 'region_fd_cache' bool
> parameter instead ?
I could do, but you previously asked me not to add an "io_ops" parameter to this
function and instead directly change it here - why is this parameter different?
> and avoid the extra VFIODevice attribute.
I don't get this - we still need to record the boolean in the vbasedev.
regards
john
On 6/10/25 14:34, John Levon wrote:
> On Tue, Jun 10, 2025 at 09:42:41AM +0200, Cédric Le Goater wrote:
>
>> On 6/7/25 02:10, John Levon wrote:
>>> For vfio-user, each region has its own fd rather than sharing
>>> vbasedev's. Add the necessary plumbing to support this, and use the
>>> correct fd in vfio_region_mmap().
>>>
>>> @@ -172,10 +174,11 @@ struct VFIODeviceIOOps {
>>> /**
>>> * @get_region_info
>>> *
>>> - * Fill in @info with information on the region given by @info->index.
>>> + * Fill in @info (and optionally @fd) with information on the region given
>>> + * by @info->index.
>>> */
>>
>> Could you please update the whole documentation of the VFIODeviceIOOps
>> struct and document each parameter ?
>
> Sorry, not sure what you're asking for. This struct was fully documented in
> 38bf025d ("vfio: add device IO ops vector")
> and its subsequent patches.
yes. See the formatting of VFIODeviceOps :
/**
* @vfio_save_config
*
* Save device config state
*
* @vdev: #VFIODevice for which to save the config
* @f: #QEMUFile where to send the data
* @errp: pointer to Error*, to store an error if it happens.
*
* Returns zero to indicate success and negative for error
*/
int (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f, Error **errp);
No big deal. It can come later.
>>> @@ -360,6 +369,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
>>> vbasedev->io_ops = &vfio_device_io_ops_ioctl;
>>> vbasedev->dev = dev;
>>> vbasedev->fd = -1;
>>> + vbasedev->use_region_fds = false;
>>
>> why not extend vfio_device_init() with a 'region_fd_cache' bool
>> parameter instead ?
>
> I could do, but you previously asked me not to add an "io_ops" parameter to this
> function and instead directly change it here - why is this parameter different?
Because I thought we could allocate vbasedev->region_fds[] directly under
vfio_device_init().
>> and avoid the extra VFIODevice attribute.
>
> I don't get this - we still need to record the boolean in the vbasedev.
but we can't because we don't have the vbasedev->num_regions value yet.
So forget this proposal.
That said, there are a few contortions around the instance_init()
handler, vfio_device_init() and the realize() handler that I find
confusing. I will see if it can be improved when the code is stable
again.
Thanks,
C.
© 2016 - 2025 Red Hat, Inc.