[PATCH v3 03/23] vfio: add per-region fd support

John Levon posted 23 patches 5 months, 1 week ago
Maintainers: John Levon <john.levon@nutanix.com>, Thanos Makatos <thanos.makatos@nutanix.com>, Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>, Paolo Bonzini <pbonzini@redhat.com>, "Marc-André Lureau" <marcandre.lureau@redhat.com>, "Daniel P. Berrangé" <berrange@redhat.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>
There is a newer version of this series
[PATCH v3 03/23] vfio: add per-region fd support
Posted by John Levon 5 months, 1 week ago
For vfio-user, each region has its own fd rather than sharing
vbasedev's. Add the necessary plumbing to support this, and use the
correct fd in vfio_region_mmap().

Signed-off-by: John Levon <john.levon@nutanix.com>
---
 include/hw/vfio/vfio-device.h |  7 +++++--
 hw/vfio/device.c              | 29 +++++++++++++++++++++++++----
 hw/vfio/region.c              |  9 +++++++--
 3 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index 923f9cd116..5cb817fd6a 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -66,6 +66,7 @@ typedef struct VFIODevice {
     OnOffAuto enable_migration;
     OnOffAuto migration_multifd_transfer;
     bool migration_events;
+    bool use_region_fds;
     VFIODeviceOps *ops;
     VFIODeviceIOOps *io_ops;
     unsigned int num_irqs;
@@ -84,6 +85,7 @@ typedef struct VFIODevice {
     VFIOIOASHwpt *hwpt;
     QLIST_ENTRY(VFIODevice) hwpt_next;
     struct vfio_region_info **reginfo;
+    int *region_fds;
 } VFIODevice;
 
 struct VFIODeviceOps {
@@ -172,10 +174,11 @@ struct VFIODeviceIOOps {
     /**
      * @get_region_info
      *
-     * Fill in @info with information on the region given by @info->index.
+     * Fill in @info (and optionally @fd) with information on the region given
+     * by @info->index.
      */
     int (*get_region_info)(VFIODevice *vdev,
-                           struct vfio_region_info *info);
+                           struct vfio_region_info *info, int *fd);
 
     /**
      * @get_irq_info
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index d0068086ae..29a8d72deb 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -226,6 +226,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
                                 struct vfio_region_info **info)
 {
     size_t argsz = sizeof(struct vfio_region_info);
+    int fd = -1;
     int ret;
 
     /* check cache */
@@ -240,7 +241,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
 retry:
     (*info)->argsz = argsz;
 
-    ret = vbasedev->io_ops->get_region_info(vbasedev, *info);
+    ret = vbasedev->io_ops->get_region_info(vbasedev, *info, &fd);
     if (ret != 0) {
         g_free(*info);
         *info = NULL;
@@ -251,11 +252,19 @@ retry:
         argsz = (*info)->argsz;
         *info = g_realloc(*info, argsz);
 
+        if (fd != -1) {
+            close(fd);
+            fd = -1;
+        }
+
         goto retry;
     }
 
     /* fill cache */
     vbasedev->reginfo[index] = *info;
+    if (vbasedev->region_fds != NULL) {
+        vbasedev->region_fds[index] = fd;
+    }
 
     return 0;
 }
@@ -360,6 +369,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
     vbasedev->io_ops = &vfio_device_io_ops_ioctl;
     vbasedev->dev = dev;
     vbasedev->fd = -1;
+    vbasedev->use_region_fds = false;
 
     vbasedev->ram_block_discard_allowed = ram_discard;
 }
@@ -470,6 +480,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
 
     vbasedev->reginfo = g_new0(struct vfio_region_info *,
                                vbasedev->num_regions);
+    if (vbasedev->use_region_fds) {
+        vbasedev->region_fds = g_new0(int, vbasedev->num_regions);
+    }
 }
 
 void vfio_device_unprepare(VFIODevice *vbasedev)
@@ -478,9 +491,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
 
     for (i = 0; i < vbasedev->num_regions; i++) {
         g_free(vbasedev->reginfo[i]);
+        if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
+            close(vbasedev->region_fds[i]);
+        }
+
     }
-    g_free(vbasedev->reginfo);
-    vbasedev->reginfo = NULL;
+
+    g_clear_pointer(&vbasedev->reginfo, g_free);
+    g_clear_pointer(&vbasedev->region_fds, g_free);
 
     QLIST_REMOVE(vbasedev, container_next);
     QLIST_REMOVE(vbasedev, global_next);
@@ -502,10 +520,13 @@ static int vfio_device_io_device_feature(VFIODevice *vbasedev,
 }
 
 static int vfio_device_io_get_region_info(VFIODevice *vbasedev,
-                                          struct vfio_region_info *info)
+                                          struct vfio_region_info *info,
+                                          int *fd)
 {
     int ret;
 
+    *fd = -1;
+
     ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
 
     return ret < 0 ? -errno : ret;
diff --git a/hw/vfio/region.c b/hw/vfio/region.c
index 34752c3f65..cb172f2136 100644
--- a/hw/vfio/region.c
+++ b/hw/vfio/region.c
@@ -241,6 +241,7 @@ int vfio_region_mmap(VFIORegion *region)
 {
     int i, ret, prot = 0;
     char *name;
+    int fd;
 
     if (!region->mem) {
         return 0;
@@ -271,14 +272,18 @@ int vfio_region_mmap(VFIORegion *region)
             goto no_mmap;
         }
 
+        /* Use the per-region fd if set, or the shared fd. */
+        fd = region->vbasedev->region_fds ?
+             region->vbasedev->region_fds[region->nr] :
+             region->vbasedev->fd,
+
         map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
         munmap(map_base, map_align - map_base);
         munmap(map_align + region->mmaps[i].size,
                align - (map_align - map_base));
 
         region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
-                                     MAP_SHARED | MAP_FIXED,
-                                     region->vbasedev->fd,
+                                     MAP_SHARED | MAP_FIXED, fd,
                                      region->fd_offset +
                                      region->mmaps[i].offset);
         if (region->mmaps[i].mmap == MAP_FAILED) {
-- 
2.43.0
Re: [PATCH v3 03/23] vfio: add per-region fd support
Posted by Cédric Le Goater 5 months, 1 week ago
On 6/7/25 02:10, John Levon wrote:
> For vfio-user, each region has its own fd rather than sharing
> vbasedev's. Add the necessary plumbing to support this, and use the
> correct fd in vfio_region_mmap().
> 
> Signed-off-by: John Levon <john.levon@nutanix.com>


Reviewed-by: Cédric Le Goater <clg@redhat.com>

Applied to vfio-next.

Thanks,

C.


> ---
>   include/hw/vfio/vfio-device.h |  7 +++++--
>   hw/vfio/device.c              | 29 +++++++++++++++++++++++++----
>   hw/vfio/region.c              |  9 +++++++--
>   3 files changed, 37 insertions(+), 8 deletions(-)
> 
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 923f9cd116..5cb817fd6a 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -66,6 +66,7 @@ typedef struct VFIODevice {
>       OnOffAuto enable_migration;
>       OnOffAuto migration_multifd_transfer;
>       bool migration_events;
> +    bool use_region_fds;
>       VFIODeviceOps *ops;
>       VFIODeviceIOOps *io_ops;
>       unsigned int num_irqs;
> @@ -84,6 +85,7 @@ typedef struct VFIODevice {
>       VFIOIOASHwpt *hwpt;
>       QLIST_ENTRY(VFIODevice) hwpt_next;
>       struct vfio_region_info **reginfo;
> +    int *region_fds;
>   } VFIODevice;
>   
>   struct VFIODeviceOps {
> @@ -172,10 +174,11 @@ struct VFIODeviceIOOps {
>       /**
>        * @get_region_info
>        *
> -     * Fill in @info with information on the region given by @info->index.
> +     * Fill in @info (and optionally @fd) with information on the region given
> +     * by @info->index.
>        */
>       int (*get_region_info)(VFIODevice *vdev,
> -                           struct vfio_region_info *info);
> +                           struct vfio_region_info *info, int *fd);
>   
>       /**
>        * @get_irq_info
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index d0068086ae..29a8d72deb 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -226,6 +226,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
>                                   struct vfio_region_info **info)
>   {
>       size_t argsz = sizeof(struct vfio_region_info);
> +    int fd = -1;
>       int ret;
>   
>       /* check cache */
> @@ -240,7 +241,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
>   retry:
>       (*info)->argsz = argsz;
>   
> -    ret = vbasedev->io_ops->get_region_info(vbasedev, *info);
> +    ret = vbasedev->io_ops->get_region_info(vbasedev, *info, &fd);
>       if (ret != 0) {
>           g_free(*info);
>           *info = NULL;
> @@ -251,11 +252,19 @@ retry:
>           argsz = (*info)->argsz;
>           *info = g_realloc(*info, argsz);
>   
> +        if (fd != -1) {
> +            close(fd);
> +            fd = -1;
> +        }
> +
>           goto retry;
>       }
>   
>       /* fill cache */
>       vbasedev->reginfo[index] = *info;
> +    if (vbasedev->region_fds != NULL) {
> +        vbasedev->region_fds[index] = fd;
> +    }
>   
>       return 0;
>   }
> @@ -360,6 +369,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
>       vbasedev->io_ops = &vfio_device_io_ops_ioctl;
>       vbasedev->dev = dev;
>       vbasedev->fd = -1;
> +    vbasedev->use_region_fds = false;
>   
>       vbasedev->ram_block_discard_allowed = ram_discard;
>   }
> @@ -470,6 +480,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
>   
>       vbasedev->reginfo = g_new0(struct vfio_region_info *,
>                                  vbasedev->num_regions);
> +    if (vbasedev->use_region_fds) {
> +        vbasedev->region_fds = g_new0(int, vbasedev->num_regions);
> +    }
>   }
>   
>   void vfio_device_unprepare(VFIODevice *vbasedev)
> @@ -478,9 +491,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
>   
>       for (i = 0; i < vbasedev->num_regions; i++) {
>           g_free(vbasedev->reginfo[i]);
> +        if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
> +            close(vbasedev->region_fds[i]);
> +        }
> +
>       }
> -    g_free(vbasedev->reginfo);
> -    vbasedev->reginfo = NULL;
> +
> +    g_clear_pointer(&vbasedev->reginfo, g_free);
> +    g_clear_pointer(&vbasedev->region_fds, g_free);
>   
>       QLIST_REMOVE(vbasedev, container_next);
>       QLIST_REMOVE(vbasedev, global_next);
> @@ -502,10 +520,13 @@ static int vfio_device_io_device_feature(VFIODevice *vbasedev,
>   }
>   
>   static int vfio_device_io_get_region_info(VFIODevice *vbasedev,
> -                                          struct vfio_region_info *info)
> +                                          struct vfio_region_info *info,
> +                                          int *fd)
>   {
>       int ret;
>   
> +    *fd = -1;
> +
>       ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
>   
>       return ret < 0 ? -errno : ret;
> diff --git a/hw/vfio/region.c b/hw/vfio/region.c
> index 34752c3f65..cb172f2136 100644
> --- a/hw/vfio/region.c
> +++ b/hw/vfio/region.c
> @@ -241,6 +241,7 @@ int vfio_region_mmap(VFIORegion *region)
>   {
>       int i, ret, prot = 0;
>       char *name;
> +    int fd;
>   
>       if (!region->mem) {
>           return 0;
> @@ -271,14 +272,18 @@ int vfio_region_mmap(VFIORegion *region)
>               goto no_mmap;
>           }
>   
> +        /* Use the per-region fd if set, or the shared fd. */
> +        fd = region->vbasedev->region_fds ?
> +             region->vbasedev->region_fds[region->nr] :
> +             region->vbasedev->fd,
> +
>           map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
>           munmap(map_base, map_align - map_base);
>           munmap(map_align + region->mmaps[i].size,
>                  align - (map_align - map_base));
>   
>           region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
> -                                     MAP_SHARED | MAP_FIXED,
> -                                     region->vbasedev->fd,
> +                                     MAP_SHARED | MAP_FIXED, fd,
>                                        region->fd_offset +
>                                        region->mmaps[i].offset);
>           if (region->mmaps[i].mmap == MAP_FAILED) {


Re: [PATCH v3 03/23] vfio: add per-region fd support
Posted by Mark Cave-Ayland 5 months, 1 week ago
On 07/06/2025 01:10, John Levon wrote:

> For vfio-user, each region has its own fd rather than sharing
> vbasedev's. Add the necessary plumbing to support this, and use the
> correct fd in vfio_region_mmap().
> 
> Signed-off-by: John Levon <john.levon@nutanix.com>
> ---
>   include/hw/vfio/vfio-device.h |  7 +++++--
>   hw/vfio/device.c              | 29 +++++++++++++++++++++++++----
>   hw/vfio/region.c              |  9 +++++++--
>   3 files changed, 37 insertions(+), 8 deletions(-)
> 
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 923f9cd116..5cb817fd6a 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -66,6 +66,7 @@ typedef struct VFIODevice {
>       OnOffAuto enable_migration;
>       OnOffAuto migration_multifd_transfer;
>       bool migration_events;
> +    bool use_region_fds;
>       VFIODeviceOps *ops;
>       VFIODeviceIOOps *io_ops;
>       unsigned int num_irqs;
> @@ -84,6 +85,7 @@ typedef struct VFIODevice {
>       VFIOIOASHwpt *hwpt;
>       QLIST_ENTRY(VFIODevice) hwpt_next;
>       struct vfio_region_info **reginfo;
> +    int *region_fds;
>   } VFIODevice;
>   
>   struct VFIODeviceOps {
> @@ -172,10 +174,11 @@ struct VFIODeviceIOOps {
>       /**
>        * @get_region_info
>        *
> -     * Fill in @info with information on the region given by @info->index.
> +     * Fill in @info (and optionally @fd) with information on the region given
> +     * by @info->index.
>        */
>       int (*get_region_info)(VFIODevice *vdev,
> -                           struct vfio_region_info *info);
> +                           struct vfio_region_info *info, int *fd);
>   
>       /**
>        * @get_irq_info
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index d0068086ae..29a8d72deb 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -226,6 +226,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
>                                   struct vfio_region_info **info)
>   {
>       size_t argsz = sizeof(struct vfio_region_info);
> +    int fd = -1;
>       int ret;
>   
>       /* check cache */
> @@ -240,7 +241,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
>   retry:
>       (*info)->argsz = argsz;
>   
> -    ret = vbasedev->io_ops->get_region_info(vbasedev, *info);
> +    ret = vbasedev->io_ops->get_region_info(vbasedev, *info, &fd);
>       if (ret != 0) {
>           g_free(*info);
>           *info = NULL;
> @@ -251,11 +252,19 @@ retry:
>           argsz = (*info)->argsz;
>           *info = g_realloc(*info, argsz);
>   
> +        if (fd != -1) {
> +            close(fd);
> +            fd = -1;
> +        }
> +
>           goto retry;
>       }
>   
>       /* fill cache */
>       vbasedev->reginfo[index] = *info;
> +    if (vbasedev->region_fds != NULL) {
> +        vbasedev->region_fds[index] = fd;
> +    }
>   
>       return 0;
>   }
> @@ -360,6 +369,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
>       vbasedev->io_ops = &vfio_device_io_ops_ioctl;
>       vbasedev->dev = dev;
>       vbasedev->fd = -1;
> +    vbasedev->use_region_fds = false;
>   
>       vbasedev->ram_block_discard_allowed = ram_discard;
>   }
> @@ -470,6 +480,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
>   
>       vbasedev->reginfo = g_new0(struct vfio_region_info *,
>                                  vbasedev->num_regions);
> +    if (vbasedev->use_region_fds) {
> +        vbasedev->region_fds = g_new0(int, vbasedev->num_regions);
> +    }
>   }
>   
>   void vfio_device_unprepare(VFIODevice *vbasedev)
> @@ -478,9 +491,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
>   
>       for (i = 0; i < vbasedev->num_regions; i++) {
>           g_free(vbasedev->reginfo[i]);
> +        if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
> +            close(vbasedev->region_fds[i]);
> +        }
> +
>       }
> -    g_free(vbasedev->reginfo);
> -    vbasedev->reginfo = NULL;
> +
> +    g_clear_pointer(&vbasedev->reginfo, g_free);
> +    g_clear_pointer(&vbasedev->region_fds, g_free);
>   
>       QLIST_REMOVE(vbasedev, container_next);
>       QLIST_REMOVE(vbasedev, global_next);
> @@ -502,10 +520,13 @@ static int vfio_device_io_device_feature(VFIODevice *vbasedev,
>   }
>   
>   static int vfio_device_io_get_region_info(VFIODevice *vbasedev,
> -                                          struct vfio_region_info *info)
> +                                          struct vfio_region_info *info,
> +                                          int *fd)
>   {
>       int ret;
>   
> +    *fd = -1;
> +
>       ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
>   
>       return ret < 0 ? -errno : ret;
> diff --git a/hw/vfio/region.c b/hw/vfio/region.c
> index 34752c3f65..cb172f2136 100644
> --- a/hw/vfio/region.c
> +++ b/hw/vfio/region.c
> @@ -241,6 +241,7 @@ int vfio_region_mmap(VFIORegion *region)
>   {
>       int i, ret, prot = 0;
>       char *name;
> +    int fd;
>   
>       if (!region->mem) {
>           return 0;
> @@ -271,14 +272,18 @@ int vfio_region_mmap(VFIORegion *region)
>               goto no_mmap;
>           }
>   
> +        /* Use the per-region fd if set, or the shared fd. */
> +        fd = region->vbasedev->region_fds ?
> +             region->vbasedev->region_fds[region->nr] :
> +             region->vbasedev->fd,
> +

This feels a bit odd: if you're asking to map a VFIORegion then 
shouldn't that contain all the information required for the mapping, 
including the fd?

(goes and looks)

It appears that there is already a fd field in VFIORegion so is there a 
reason why we can't use this instead of adding region_fds to vbasedev?

>           map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
>           munmap(map_base, map_align - map_base);
>           munmap(map_align + region->mmaps[i].size,
>                  align - (map_align - map_base));
>   
>           region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
> -                                     MAP_SHARED | MAP_FIXED,
> -                                     region->vbasedev->fd,
> +                                     MAP_SHARED | MAP_FIXED, fd,
>                                        region->fd_offset +
>                                        region->mmaps[i].offset);
>           if (region->mmaps[i].mmap == MAP_FAILED) {


ATB,

Mark.
Re: [PATCH v3 03/23] vfio: add per-region fd support
Posted by John Levon 5 months, 1 week ago
On Tue, Jun 10, 2025 at 11:58:18AM +0100, Mark Cave-Ayland wrote:

> > +        /* Use the per-region fd if set, or the shared fd. */
> > +        fd = region->vbasedev->region_fds ?
> > +             region->vbasedev->region_fds[region->nr] :
> > +             region->vbasedev->fd,
> > +
> 
> This feels a bit odd: if you're asking to map a VFIORegion then shouldn't
> that contain all the information required for the mapping, including the fd?
> 
> (goes and looks)
> 
> It appears that there is already a fd field in VFIORegion so is there a
> reason why we can't use this instead of adding region_fds to vbasedev?

region->fd was in earlier patchsets, but dropped as requested by Cédric. So we
must look it up here.

regards
john
Re: [PATCH v3 03/23] vfio: add per-region fd support
Posted by Cédric Le Goater 5 months, 1 week ago
On 6/10/25 13:27, John Levon wrote:
> On Tue, Jun 10, 2025 at 11:58:18AM +0100, Mark Cave-Ayland wrote:
> 
>>> +        /* Use the per-region fd if set, or the shared fd. */
>>> +        fd = region->vbasedev->region_fds ?
>>> +             region->vbasedev->region_fds[region->nr] :
>>> +             region->vbasedev->fd,
>>> +
>>
>> This feels a bit odd: if you're asking to map a VFIORegion then shouldn't
>> that contain all the information required for the mapping, including the fd?
>>
>> (goes and looks)
>>
>> It appears that there is already a fd field in VFIORegion so is there a
>> reason why we can't use this instead of adding region_fds to vbasedev?
> 
> region->fd was in earlier patchsets, but dropped as requested by Cédric. So we
> must look it up here.

yes. It didn't seem necessary to store the same information under
multiple structs.

C.


Re: [PATCH v3 03/23] vfio: add per-region fd support
Posted by John Levon 5 months, 1 week ago
On Tue, Jun 10, 2025 at 01:57:23PM +0200, Cédric Le Goater wrote:

> On 6/10/25 13:27, John Levon wrote:
> > On Tue, Jun 10, 2025 at 11:58:18AM +0100, Mark Cave-Ayland wrote:
> > 
> > > > +        /* Use the per-region fd if set, or the shared fd. */
> > > > +        fd = region->vbasedev->region_fds ?
> > > > +             region->vbasedev->region_fds[region->nr] :
> > > > +             region->vbasedev->fd,
> > > > +
> > > 
> > > This feels a bit odd: if you're asking to map a VFIORegion then shouldn't
> > > that contain all the information required for the mapping, including the fd?
> > > 
> > > (goes and looks)
> > > 
> > > It appears that there is already a fd field in VFIORegion so is there a
> > > reason why we can't use this instead of adding region_fds to vbasedev?
> > 
> > region->fd was in earlier patchsets, but dropped as requested by Cédric. So we
> > must look it up here.
> 
> yes. It didn't seem necessary to store the same information under
> multiple structs.

And the reason we have ->region_fds at all rather than just storing directly in
region->fd is that vfio_device_get_region_info() doesn't take a VFIORegion
argument, as it's used in other contexts too.

regards
john
Re: [PATCH v3 03/23] vfio: add per-region fd support
Posted by Cédric Le Goater 5 months, 1 week ago
On 6/7/25 02:10, John Levon wrote:
> For vfio-user, each region has its own fd rather than sharing
> vbasedev's. Add the necessary plumbing to support this, and use the
> correct fd in vfio_region_mmap().
> 
> Signed-off-by: John Levon <john.levon@nutanix.com>
> ---
>   include/hw/vfio/vfio-device.h |  7 +++++--
>   hw/vfio/device.c              | 29 +++++++++++++++++++++++++----
>   hw/vfio/region.c              |  9 +++++++--
>   3 files changed, 37 insertions(+), 8 deletions(-)
> 
> diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
> index 923f9cd116..5cb817fd6a 100644
> --- a/include/hw/vfio/vfio-device.h
> +++ b/include/hw/vfio/vfio-device.h
> @@ -66,6 +66,7 @@ typedef struct VFIODevice {
>       OnOffAuto enable_migration;
>       OnOffAuto migration_multifd_transfer;
>       bool migration_events;
> +    bool use_region_fds;
>       VFIODeviceOps *ops;
>       VFIODeviceIOOps *io_ops;
>       unsigned int num_irqs;
> @@ -84,6 +85,7 @@ typedef struct VFIODevice {
>       VFIOIOASHwpt *hwpt;
>       QLIST_ENTRY(VFIODevice) hwpt_next;
>       struct vfio_region_info **reginfo;
> +    int *region_fds;
>   } VFIODevice;
>   
>   struct VFIODeviceOps {
> @@ -172,10 +174,11 @@ struct VFIODeviceIOOps {
>       /**
>        * @get_region_info
>        *
> -     * Fill in @info with information on the region given by @info->index.
> +     * Fill in @info (and optionally @fd) with information on the region given
> +     * by @info->index.
>        */

Could you please update the whole documentation of the VFIODeviceIOOps
struct and document each parameter ?


>       int (*get_region_info)(VFIODevice *vdev,
> -                           struct vfio_region_info *info);
> +                           struct vfio_region_info *info, int *fd);
>   
>       /**
>        * @get_irq_info
> diff --git a/hw/vfio/device.c b/hw/vfio/device.c
> index d0068086ae..29a8d72deb 100644
> --- a/hw/vfio/device.c
> +++ b/hw/vfio/device.c
> @@ -226,6 +226,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
>                                   struct vfio_region_info **info)
>   {
>       size_t argsz = sizeof(struct vfio_region_info);
> +    int fd = -1;
>       int ret;
>   
>       /* check cache */
> @@ -240,7 +241,7 @@ int vfio_device_get_region_info(VFIODevice *vbasedev, int index,
>   retry:
>       (*info)->argsz = argsz;
>   
> -    ret = vbasedev->io_ops->get_region_info(vbasedev, *info);
> +    ret = vbasedev->io_ops->get_region_info(vbasedev, *info, &fd);
>       if (ret != 0) {
>           g_free(*info);
>           *info = NULL;
> @@ -251,11 +252,19 @@ retry:
>           argsz = (*info)->argsz;
>           *info = g_realloc(*info, argsz);
>   
> +        if (fd != -1) {
> +            close(fd);
> +            fd = -1;
> +        }
> +
>           goto retry;
>       }
>   
>       /* fill cache */
>       vbasedev->reginfo[index] = *info;
> +    if (vbasedev->region_fds != NULL) {
> +        vbasedev->region_fds[index] = fd;
> +    }
>   
>       return 0;
>   }
> @@ -360,6 +369,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
>       vbasedev->io_ops = &vfio_device_io_ops_ioctl;
>       vbasedev->dev = dev;
>       vbasedev->fd = -1;
> +    vbasedev->use_region_fds = false;

why not extend vfio_device_init() with a 'region_fd_cache' bool
parameter instead ? and avoid the extra VFIODevice attribute.

>   
>       vbasedev->ram_block_discard_allowed = ram_discard;
>   }
> @@ -470,6 +480,9 @@ void vfio_device_prepare(VFIODevice *vbasedev, VFIOContainerBase *bcontainer,
>   
>       vbasedev->reginfo = g_new0(struct vfio_region_info *,
>                                  vbasedev->num_regions);
> +    if (vbasedev->use_region_fds) {
> +        vbasedev->region_fds = g_new0(int, vbasedev->num_regions);
> +    }
>   }
>   
>   void vfio_device_unprepare(VFIODevice *vbasedev)
> @@ -478,9 +491,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
>   
>       for (i = 0; i < vbasedev->num_regions; i++) {
>           g_free(vbasedev->reginfo[i]);
> +        if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
> +            close(vbasedev->region_fds[i]);
> +        }
> +

Adding a small helper to retrieve the region fd from the cache,
would hide some of the implementation details here and below in
vfio_region_mmap()


>       }
> -    g_free(vbasedev->reginfo);
> -    vbasedev->reginfo = NULL;
> +
> +    g_clear_pointer(&vbasedev->reginfo, g_free);

unrelated change.


Thanks,

C.



> +    g_clear_pointer(&vbasedev->region_fds, g_free);
>   
>       QLIST_REMOVE(vbasedev, container_next);
>       QLIST_REMOVE(vbasedev, global_next);
> @@ -502,10 +520,13 @@ static int vfio_device_io_device_feature(VFIODevice *vbasedev,
>   }
>   
>   static int vfio_device_io_get_region_info(VFIODevice *vbasedev,
> -                                          struct vfio_region_info *info)
> +                                          struct vfio_region_info *info,
> +                                          int *fd)
>   {
>       int ret;
>   
> +    *fd = -1;
> +
>       ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
>   
>       return ret < 0 ? -errno : ret;
> diff --git a/hw/vfio/region.c b/hw/vfio/region.c
> index 34752c3f65..cb172f2136 100644
> --- a/hw/vfio/region.c
> +++ b/hw/vfio/region.c
> @@ -241,6 +241,7 @@ int vfio_region_mmap(VFIORegion *region)
>   {
>       int i, ret, prot = 0;
>       char *name;
> +    int fd;
>   
>       if (!region->mem) {
>           return 0;
> @@ -271,14 +272,18 @@ int vfio_region_mmap(VFIORegion *region)
>               goto no_mmap;
>           }
>   
> +        /* Use the per-region fd if set, or the shared fd. */
> +        fd = region->vbasedev->region_fds ?
> +             region->vbasedev->region_fds[region->nr] :
> +             region->vbasedev->fd,
> +
>           map_align = (void *)ROUND_UP((uintptr_t)map_base, (uintptr_t)align);
>           munmap(map_base, map_align - map_base);
>           munmap(map_align + region->mmaps[i].size,
>                  align - (map_align - map_base));
>   
>           region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
> -                                     MAP_SHARED | MAP_FIXED,
> -                                     region->vbasedev->fd,
> +                                     MAP_SHARED | MAP_FIXED, fd,
>                                        region->fd_offset +
>                                        region->mmaps[i].offset);
>           if (region->mmaps[i].mmap == MAP_FAILED) {
Re: [PATCH v3 03/23] vfio: add per-region fd support
Posted by John Levon 5 months, 1 week ago
On Tue, Jun 10, 2025 at 09:42:41AM +0200, Cédric Le Goater wrote:

> > @@ -478,9 +491,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
> >       for (i = 0; i < vbasedev->num_regions; i++) {
> >           g_free(vbasedev->reginfo[i]);
> > +        if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
> > +            close(vbasedev->region_fds[i]);
> > +        }
> 
> Adding a small helper to retrieve the region fd from the cache,
> would hide some of the implementation details here and below in
> vfio_region_mmap()

I've added vfio_device_get_region_fd() - used by vfio_region_mmap(); now the
existence of ->region_fds is private to device.c, so I think that's better.

regards
john
Re: [PATCH v3 03/23] vfio: add per-region fd support
Posted by John Levon 5 months, 1 week ago
On Tue, Jun 10, 2025 at 09:42:41AM +0200, Cédric Le Goater wrote:

> > @@ -478,9 +491,14 @@ void vfio_device_unprepare(VFIODevice *vbasedev)
> >       for (i = 0; i < vbasedev->num_regions; i++) {
> >           g_free(vbasedev->reginfo[i]);
> > +        if (vbasedev->region_fds != NULL && vbasedev->region_fds[i] != -1) {
> > +            close(vbasedev->region_fds[i]);
> > +        }
> > +
> 
> Adding a small helper to retrieve the region fd from the cache,
> would hide some of the implementation details here and below in
> vfio_region_mmap()

I can add a region_get_fd() helper, but I can't use it here - we should only
close the fd if it's a per-region fd. I'll make this call a region_cleanup() or
similar.

regards
john
Re: [PATCH v3 03/23] vfio: add per-region fd support
Posted by John Levon 5 months, 1 week ago
On Tue, Jun 10, 2025 at 09:42:41AM +0200, Cédric Le Goater wrote:

> On 6/7/25 02:10, John Levon wrote:
> > For vfio-user, each region has its own fd rather than sharing
> > vbasedev's. Add the necessary plumbing to support this, and use the
> > correct fd in vfio_region_mmap().
> > 
> > @@ -172,10 +174,11 @@ struct VFIODeviceIOOps {
> >       /**
> >        * @get_region_info
> >        *
> > -     * Fill in @info with information on the region given by @info->index.
> > +     * Fill in @info (and optionally @fd) with information on the region given
> > +     * by @info->index.
> >        */
> 
> Could you please update the whole documentation of the VFIODeviceIOOps
> struct and document each parameter ?

Sorry, not sure what you're asking for. This struct was fully documented in
38bf025d ("vfio: add device IO ops vector")
and its subsequent patches.

> > @@ -360,6 +369,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
> >       vbasedev->io_ops = &vfio_device_io_ops_ioctl;
> >       vbasedev->dev = dev;
> >       vbasedev->fd = -1;
> > +    vbasedev->use_region_fds = false;
> 
> why not extend vfio_device_init() with a 'region_fd_cache' bool
> parameter instead ?

I could do, but you previously asked me not to add an "io_ops" parameter to this
function and instead directly change it here - why is this parameter different?

> and avoid the extra VFIODevice attribute.

I don't get this - we still need to record the boolean in the vbasedev.

regards
john
Re: [PATCH v3 03/23] vfio: add per-region fd support
Posted by Cédric Le Goater 5 months, 1 week ago
On 6/10/25 14:34, John Levon wrote:
> On Tue, Jun 10, 2025 at 09:42:41AM +0200, Cédric Le Goater wrote:
> 
>> On 6/7/25 02:10, John Levon wrote:
>>> For vfio-user, each region has its own fd rather than sharing
>>> vbasedev's. Add the necessary plumbing to support this, and use the
>>> correct fd in vfio_region_mmap().
>>>
>>> @@ -172,10 +174,11 @@ struct VFIODeviceIOOps {
>>>        /**
>>>         * @get_region_info
>>>         *
>>> -     * Fill in @info with information on the region given by @info->index.
>>> +     * Fill in @info (and optionally @fd) with information on the region given
>>> +     * by @info->index.
>>>         */
>>
>> Could you please update the whole documentation of the VFIODeviceIOOps
>> struct and document each parameter ?
> 
> Sorry, not sure what you're asking for. This struct was fully documented in
> 38bf025d ("vfio: add device IO ops vector")
> and its subsequent patches.

yes. See the formatting of VFIODeviceOps :

     /**
      * @vfio_save_config
      *
      * Save device config state
      *
      * @vdev: #VFIODevice for which to save the config
      * @f: #QEMUFile where to send the data
      * @errp: pointer to Error*, to store an error if it happens.
      *
      * Returns zero to indicate success and negative for error
      */
     int (*vfio_save_config)(VFIODevice *vdev, QEMUFile *f, Error **errp);


No big deal. It can come later.


>>> @@ -360,6 +369,7 @@ void vfio_device_init(VFIODevice *vbasedev, int type, VFIODeviceOps *ops,
>>>        vbasedev->io_ops = &vfio_device_io_ops_ioctl;
>>>        vbasedev->dev = dev;
>>>        vbasedev->fd = -1;
>>> +    vbasedev->use_region_fds = false;
>>
>> why not extend vfio_device_init() with a 'region_fd_cache' bool
>> parameter instead ?
> 
> I could do, but you previously asked me not to add an "io_ops" parameter to this
> function and instead directly change it here - why is this parameter different?
Because I thought we could allocate vbasedev->region_fds[] directly under
vfio_device_init().
  >> and avoid the extra VFIODevice attribute.
> 
> I don't get this - we still need to record the boolean in the vbasedev.

but we can't because we don't have the vbasedev->num_regions value yet.
So forget this proposal.

That said, there are a few contortions around the instance_init()
handler, vfio_device_init() and the realize() handler that I find
confusing. I will see if it can be improved when the code is stable
again.

Thanks,

C.