[v3] vfio: Implement VFIO_DEVICE_FEATURE_DMA_BUF and use it in virtio-gpu

[PATCH v3 9/9] virtio-gpu-dmabuf: Create dmabuf for blobs associated with VFIO devices

Posted by Vivek Kasireddy 2 months, 2 weeks ago

In addition to memfd, a blob resource can also have its backing
storage in a VFIO device region. Therefore, we first need to figure
out if the blob is backed by a VFIO device region or a memfd before
we can call the right API to get a dmabuf fd created.

So, we first call virtio_gpu_create_udmabuf() which further calls
ram_block_is_memfd_backed() to check if the blob's backing storage
is located in a memfd or not. If it is not, we call vfio_create_dmabuf()
which identifies the right VFIO device and eventually invokes the
API vfio_device_create_dmabuf_fd() to have a dmabuf fd created.

Note that in virtio_gpu_remap_dmabuf(), we first try to test if
the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
use the VFIO device fd directly to create the CPU mapping.

While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
from virtio_gpu_create_udmabuf().

Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
Cc: Alex Bennée <alex.bennee@linaro.org>
Cc: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Cc: Alex Williamson <alex@shazbot.org>
Cc: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
---
 hw/display/Kconfig             |   5 ++
 hw/display/virtio-gpu-dmabuf.c | 122 ++++++++++++++++++++++++++++++---
 2 files changed, 119 insertions(+), 8 deletions(-)

diff --git a/hw/display/Kconfig b/hw/display/Kconfig
index 1e95ab28ef..0d090f25f5 100644
--- a/hw/display/Kconfig
+++ b/hw/display/Kconfig
@@ -106,6 +106,11 @@ config VIRTIO_VGA
     depends on VIRTIO_PCI
     select VGA
 
+config VIRTIO_GPU_VFIO_BLOB
+    bool
+    default y
+    depends on VFIO
+
 config VHOST_USER_GPU
     bool
     default y
diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-dmabuf.c
index 258c48d31b..d121a2c9a7 100644
--- a/hw/display/virtio-gpu-dmabuf.c
+++ b/hw/display/virtio-gpu-dmabuf.c
@@ -18,6 +18,7 @@
 #include "ui/console.h"
 #include "hw/virtio/virtio-gpu.h"
 #include "hw/virtio/virtio-gpu-pixman.h"
+#include "hw/vfio/vfio-device.h"
 #include "trace.h"
 #include "system/ramblock.h"
 #include "system/hostmem.h"
@@ -40,10 +41,42 @@ static bool ram_block_is_memfd_backed(RAMBlock *rb)
     return false;
 }
 
+static void vfio_create_dmabuf(struct virtio_gpu_simple_resource *res)
+{
+#if defined(VIRTIO_GPU_VFIO_BLOB)
+    VFIODevice *vbasedev;
+    RAMBlock *first_rb;
+    ram_addr_t offset;
+
+    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
+    if (!first_rb) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Could not find ramblock\n", __func__);
+        return;
+    }
+
+    vbasedev = vfio_device_lookup(first_rb->mr);
+    if (!vbasedev) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: No VFIO device found to create dmabuf from\n",
+                      __func__);
+        return;
+    }
+
+    res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
+                                                  res->iov, res->iov_cnt);
+    if (res->dmabuf_fd < 0) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
+                      __func__, strerror(errno));
+    }
+#endif
+}
+
 static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
 {
     struct udmabuf_create_list *list;
-    RAMBlock *rb;
+    RAMBlock *rb, *first_rb;
     ram_addr_t offset;
     int udmabuf, i;
 
@@ -52,15 +85,17 @@ static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
         return;
     }
 
+    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
+    if (!ram_block_is_memfd_backed(first_rb)) {
+        return;
+    }
+
     list = g_malloc0(sizeof(struct udmabuf_create_list) +
                      sizeof(struct udmabuf_create_item) * res->iov_cnt);
 
     for (i = 0; i < res->iov_cnt; i++) {
-        rcu_read_lock();
         rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
-        rcu_read_unlock();
-
-        if (!rb || rb->fd < 0) {
+        if (rb != first_rb) {
             g_free(list);
             return;
         }
@@ -81,11 +116,77 @@ static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
     g_free(list);
 }
 
+static void *vfio_dmabuf_mmap(struct virtio_gpu_simple_resource *res)
+{
+    struct vfio_region_info *info = NULL;
+    VFIODevice *vbasedev = NULL;
+    ram_addr_t offset, len = 0;
+    RAMBlock *first_rb, *rb;
+    void *map, *submap;
+    int i, ret = -1;
+
+    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
+    if (!first_rb) {
+        return MAP_FAILED;
+    }
+#if defined(VIRTIO_GPU_VFIO_BLOB)
+    vbasedev = vfio_device_lookup(first_rb->mr);
+#endif
+    if (!vbasedev) {
+        return MAP_FAILED;
+    }
+
+    /*
+     * We first reserve a contiguous chunk of address space for the entire
+     * dmabuf, then replace it with smaller mappings that correspond to the
+     * individual segments of the dmabuf.
+     */
+    map = mmap(NULL, res->blob_size, PROT_READ, MAP_SHARED, vbasedev->fd, 0);
+    if (map == MAP_FAILED) {
+        return map;
+    }
+
+    for (i = 0; i < res->iov_cnt; i++) {
+        rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
+        if (rb != first_rb) {
+            goto err;
+        }
+#if defined(VIRTIO_GPU_VFIO_BLOB)
+        ret = vfio_get_region_index_from_mr(rb->mr);
+        if (ret < 0) {
+            goto err;
+        }
+
+        ret = vfio_device_get_region_info(vbasedev, ret, &info);
+#endif
+        if (ret < 0 || !info) {
+            goto err;
+        }
+
+        submap = mmap(map + len, res->iov[i].iov_len, PROT_READ,
+                      MAP_SHARED | MAP_FIXED, vbasedev->fd,
+                      info->offset + offset);
+        if (submap == MAP_FAILED) {
+            goto err;
+        }
+
+        len += res->iov[i].iov_len;
+    }
+    return map;
+err:
+    munmap(map, res->blob_size);
+    return MAP_FAILED;
+}
+
 static void virtio_gpu_remap_dmabuf(struct virtio_gpu_simple_resource *res)
 {
     res->remapped = mmap(NULL, res->blob_size, PROT_READ,
                          MAP_SHARED, res->dmabuf_fd, 0);
     if (res->remapped == MAP_FAILED) {
+        res->remapped = vfio_dmabuf_mmap(res);
+        if (res->remapped != MAP_FAILED) {
+            return;
+        }
         warn_report("%s: dmabuf mmap failed: %s", __func__,
                     strerror(errno));
         res->remapped = NULL;
@@ -146,6 +247,13 @@ void virtio_gpu_init_dmabuf(struct virtio_gpu_simple_resource *res)
     } else {
         virtio_gpu_create_udmabuf(res);
         if (res->dmabuf_fd < 0) {
+            vfio_create_dmabuf(res);
+        }
+
+        if (res->dmabuf_fd < 0) {
+            qemu_log_mask(LOG_GUEST_ERROR,
+                          "%s: memory region cannot be used to create dmabuf\n",
+                          __func__);
             return;
         }
         virtio_gpu_remap_dmabuf(res);
@@ -160,9 +268,7 @@ void virtio_gpu_init_dmabuf(struct virtio_gpu_simple_resource *res)
 
 void virtio_gpu_fini_dmabuf(struct virtio_gpu_simple_resource *res)
 {
-    if (res->remapped) {
-        virtio_gpu_destroy_dmabuf(res);
-    }
+    virtio_gpu_destroy_dmabuf(res);
 }
 
 static void virtio_gpu_free_dmabuf(VirtIOGPU *g, VGPUDMABuf *dmabuf)
-- 
2.50.1

Re: [PATCH v3 9/9] virtio-gpu-dmabuf: Create dmabuf for blobs associated with VFIO devices

Posted by Akihiko Odaki 2 months, 2 weeks ago

On 2025/11/22 15:46, Vivek Kasireddy wrote:
> In addition to memfd, a blob resource can also have its backing
> storage in a VFIO device region. Therefore, we first need to figure
> out if the blob is backed by a VFIO device region or a memfd before
> we can call the right API to get a dmabuf fd created.
> 
> So, we first call virtio_gpu_create_udmabuf() which further calls
> ram_block_is_memfd_backed() to check if the blob's backing storage
> is located in a memfd or not. If it is not, we call vfio_create_dmabuf()
> which identifies the right VFIO device and eventually invokes the
> API vfio_device_create_dmabuf_fd() to have a dmabuf fd created.
> 
> Note that in virtio_gpu_remap_dmabuf(), we first try to test if
> the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
> use the VFIO device fd directly to create the CPU mapping.
> 
> While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
> from virtio_gpu_create_udmabuf().
> 
> Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
> Cc: Alex Bennée <alex.bennee@linaro.org>
> Cc: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
> Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> Cc: Alex Williamson <alex@shazbot.org>
> Cc: Cédric Le Goater <clg@redhat.com>
> Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
> ---
>   hw/display/Kconfig             |   5 ++
>   hw/display/virtio-gpu-dmabuf.c | 122 ++++++++++++++++++++++++++++++---
>   2 files changed, 119 insertions(+), 8 deletions(-)
> 
> diff --git a/hw/display/Kconfig b/hw/display/Kconfig
> index 1e95ab28ef..0d090f25f5 100644
> --- a/hw/display/Kconfig
> +++ b/hw/display/Kconfig
> @@ -106,6 +106,11 @@ config VIRTIO_VGA
>       depends on VIRTIO_PCI
>       select VGA
>   
> +config VIRTIO_GPU_VFIO_BLOB
> +    bool
> +    default y
> +    depends on VFIO
> +
>   config VHOST_USER_GPU
>       bool
>       default y
> diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-dmabuf.c
> index 258c48d31b..d121a2c9a7 100644
> --- a/hw/display/virtio-gpu-dmabuf.c
> +++ b/hw/display/virtio-gpu-dmabuf.c
> @@ -18,6 +18,7 @@
>   #include "ui/console.h"
>   #include "hw/virtio/virtio-gpu.h"
>   #include "hw/virtio/virtio-gpu-pixman.h"
> +#include "hw/vfio/vfio-device.h"
>   #include "trace.h"
>   #include "system/ramblock.h"
>   #include "system/hostmem.h"
> @@ -40,10 +41,42 @@ static bool ram_block_is_memfd_backed(RAMBlock *rb)
>       return false;
>   }
>   
> +static void vfio_create_dmabuf(struct virtio_gpu_simple_resource *res)
> +{
> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> +    VFIODevice *vbasedev;
> +    RAMBlock *first_rb;
> +    ram_addr_t offset;
> +
> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
> +    if (!first_rb) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s: Could not find ramblock\n", __func__);
> +        return;
> +    }
> +
> +    vbasedev = vfio_device_lookup(first_rb->mr);
> +    if (!vbasedev) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s: No VFIO device found to create dmabuf from\n",
> +                      __func__);
> +        return;
> +    }
> +
> +    res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
> +                                                  res->iov, res->iov_cnt);
> +    if (res->dmabuf_fd < 0) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
> +                      __func__, strerror(errno));
> +    }
> +#endif
> +}
> +
>   static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
>   {
>       struct udmabuf_create_list *list;
> -    RAMBlock *rb;
> +    RAMBlock *rb, *first_rb;
>       ram_addr_t offset;
>       int udmabuf, i;
>   
> @@ -52,15 +85,17 @@ static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
>           return;
>       }
>   
> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
> +    if (!ram_block_is_memfd_backed(first_rb)) {
> +        return;
> +    }
> +

We had an extensive discussion but I still don't understand the benefit 
of this change while I see it adds complexity by having another call of 
qemu_ram_block_from_host() and imposing an extra restriction that all 
elements need to belong to one RAMBlock.

If anyone else have some opinion on this, I'd like to hear.

Regards,
Akihiko Odaki

>       list = g_malloc0(sizeof(struct udmabuf_create_list) +
>                        sizeof(struct udmabuf_create_item) * res->iov_cnt);
>   
>       for (i = 0; i < res->iov_cnt; i++) {
> -        rcu_read_lock();
>           rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
> -        rcu_read_unlock();
> -
> -        if (!rb || rb->fd < 0) {
> +        if (rb != first_rb) {
>               g_free(list);
>               return;
>           }
> @@ -81,11 +116,77 @@ static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
>       g_free(list);
>   }
>   
> +static void *vfio_dmabuf_mmap(struct virtio_gpu_simple_resource *res)
> +{
> +    struct vfio_region_info *info = NULL;
> +    VFIODevice *vbasedev = NULL;
> +    ram_addr_t offset, len = 0;
> +    RAMBlock *first_rb, *rb;
> +    void *map, *submap;
> +    int i, ret = -1;
> +
> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
> +    if (!first_rb) {
> +        return MAP_FAILED;
> +    }
> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> +    vbasedev = vfio_device_lookup(first_rb->mr);
> +#endif
> +    if (!vbasedev) {
> +        return MAP_FAILED;
> +    }
> +
> +    /*
> +     * We first reserve a contiguous chunk of address space for the entire
> +     * dmabuf, then replace it with smaller mappings that correspond to the
> +     * individual segments of the dmabuf.
> +     */
> +    map = mmap(NULL, res->blob_size, PROT_READ, MAP_SHARED, vbasedev->fd, 0);
> +    if (map == MAP_FAILED) {
> +        return map;
> +    }
> +
> +    for (i = 0; i < res->iov_cnt; i++) {
> +        rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
> +        if (rb != first_rb) {
> +            goto err;
> +        }
> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> +        ret = vfio_get_region_index_from_mr(rb->mr);
> +        if (ret < 0) {
> +            goto err;
> +        }
> +
> +        ret = vfio_device_get_region_info(vbasedev, ret, &info);
> +#endif
> +        if (ret < 0 || !info) {
> +            goto err;
> +        }
> +
> +        submap = mmap(map + len, res->iov[i].iov_len, PROT_READ,
> +                      MAP_SHARED | MAP_FIXED, vbasedev->fd,
> +                      info->offset + offset);
> +        if (submap == MAP_FAILED) {
> +            goto err;
> +        }
> +
> +        len += res->iov[i].iov_len;
> +    }
> +    return map;
> +err:
> +    munmap(map, res->blob_size);
> +    return MAP_FAILED;
> +}
> +
>   static void virtio_gpu_remap_dmabuf(struct virtio_gpu_simple_resource *res)
>   {
>       res->remapped = mmap(NULL, res->blob_size, PROT_READ,
>                            MAP_SHARED, res->dmabuf_fd, 0);
>       if (res->remapped == MAP_FAILED) {
> +        res->remapped = vfio_dmabuf_mmap(res);
> +        if (res->remapped != MAP_FAILED) {
> +            return;
> +        }
>           warn_report("%s: dmabuf mmap failed: %s", __func__,
>                       strerror(errno));
>           res->remapped = NULL;
> @@ -146,6 +247,13 @@ void virtio_gpu_init_dmabuf(struct virtio_gpu_simple_resource *res)
>       } else {
>           virtio_gpu_create_udmabuf(res);
>           if (res->dmabuf_fd < 0) {
> +            vfio_create_dmabuf(res);
> +        }
> +
> +        if (res->dmabuf_fd < 0) {
> +            qemu_log_mask(LOG_GUEST_ERROR,
> +                          "%s: memory region cannot be used to create dmabuf\n",
> +                          __func__);
>               return;
>           }
>           virtio_gpu_remap_dmabuf(res);
> @@ -160,9 +268,7 @@ void virtio_gpu_init_dmabuf(struct virtio_gpu_simple_resource *res)
>   
>   void virtio_gpu_fini_dmabuf(struct virtio_gpu_simple_resource *res)
>   {
> -    if (res->remapped) {
> -        virtio_gpu_destroy_dmabuf(res);
> -    }
> +    virtio_gpu_destroy_dmabuf(res);
>   }
>   
>   static void virtio_gpu_free_dmabuf(VirtIOGPU *g, VGPUDMABuf *dmabuf)

RE: [PATCH v3 9/9] virtio-gpu-dmabuf: Create dmabuf for blobs associated with VFIO devices

Posted by Kasireddy, Vivek 2 months, 2 weeks ago

Hi Akihiko,

> Subject: Re: [PATCH v3 9/9] virtio-gpu-dmabuf: Create dmabuf for blobs
> associated with VFIO devices
> 
> On 2025/11/22 15:46, Vivek Kasireddy wrote:
> > In addition to memfd, a blob resource can also have its backing
> > storage in a VFIO device region. Therefore, we first need to figure
> > out if the blob is backed by a VFIO device region or a memfd before
> > we can call the right API to get a dmabuf fd created.
> >
> > So, we first call virtio_gpu_create_udmabuf() which further calls
> > ram_block_is_memfd_backed() to check if the blob's backing storage
> > is located in a memfd or not. If it is not, we call vfio_create_dmabuf()
> > which identifies the right VFIO device and eventually invokes the
> > API vfio_device_create_dmabuf_fd() to have a dmabuf fd created.
> >
> > Note that in virtio_gpu_remap_dmabuf(), we first try to test if
> > the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
> > use the VFIO device fd directly to create the CPU mapping.
> >
> > While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
> > from virtio_gpu_create_udmabuf().
> >
> > Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
> > Cc: Alex Bennée <alex.bennee@linaro.org>
> > Cc: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
> > Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> > Cc: Alex Williamson <alex@shazbot.org>
> > Cc: Cédric Le Goater <clg@redhat.com>
> > Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
> > ---
> >   hw/display/Kconfig             |   5 ++
> >   hw/display/virtio-gpu-dmabuf.c | 122
> ++++++++++++++++++++++++++++++---
> >   2 files changed, 119 insertions(+), 8 deletions(-)
> >
> > diff --git a/hw/display/Kconfig b/hw/display/Kconfig
> > index 1e95ab28ef..0d090f25f5 100644
> > --- a/hw/display/Kconfig
> > +++ b/hw/display/Kconfig
> > @@ -106,6 +106,11 @@ config VIRTIO_VGA
> >       depends on VIRTIO_PCI
> >       select VGA
> >
> > +config VIRTIO_GPU_VFIO_BLOB
> > +    bool
> > +    default y
> > +    depends on VFIO
> > +
> >   config VHOST_USER_GPU
> >       bool
> >       default y
> > diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-
> dmabuf.c
> > index 258c48d31b..d121a2c9a7 100644
> > --- a/hw/display/virtio-gpu-dmabuf.c
> > +++ b/hw/display/virtio-gpu-dmabuf.c
> > @@ -18,6 +18,7 @@
> >   #include "ui/console.h"
> >   #include "hw/virtio/virtio-gpu.h"
> >   #include "hw/virtio/virtio-gpu-pixman.h"
> > +#include "hw/vfio/vfio-device.h"
> >   #include "trace.h"
> >   #include "system/ramblock.h"
> >   #include "system/hostmem.h"
> > @@ -40,10 +41,42 @@ static bool ram_block_is_memfd_backed(RAMBlock
> *rb)
> >       return false;
> >   }
> >
> > +static void vfio_create_dmabuf(struct virtio_gpu_simple_resource *res)
> > +{
> > +#if defined(VIRTIO_GPU_VFIO_BLOB)
> > +    VFIODevice *vbasedev;
> > +    RAMBlock *first_rb;
> > +    ram_addr_t offset;
> > +
> > +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
> &offset);
> > +    if (!first_rb) {
> > +        qemu_log_mask(LOG_GUEST_ERROR,
> > +                      "%s: Could not find ramblock\n", __func__);
> > +        return;
> > +    }
> > +
> > +    vbasedev = vfio_device_lookup(first_rb->mr);
> > +    if (!vbasedev) {
> > +        qemu_log_mask(LOG_GUEST_ERROR,
> > +                      "%s: No VFIO device found to create dmabuf from\n",
> > +                      __func__);
> > +        return;
> > +    }
> > +
> > +    res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
> > +                                                  res->iov, res->iov_cnt);
> > +    if (res->dmabuf_fd < 0) {
> > +        qemu_log_mask(LOG_GUEST_ERROR,
> > +                      "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
> > +                      __func__, strerror(errno));
> > +    }
> > +#endif
> > +}
> > +
> >   static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource
> *res)
> >   {
> >       struct udmabuf_create_list *list;
> > -    RAMBlock *rb;
> > +    RAMBlock *rb, *first_rb;
> >       ram_addr_t offset;
> >       int udmabuf, i;
> >
> > @@ -52,15 +85,17 @@ static void virtio_gpu_create_udmabuf(struct
> virtio_gpu_simple_resource *res)
> >           return;
> >       }
> >
> > +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
> &offset);
> > +    if (!ram_block_is_memfd_backed(first_rb)) {
> > +        return;
> > +    }
> > +
> 
> We had an extensive discussion but I still don't understand the benefit
> of this change while I see it adds complexity by having another call of
I'll get rid of the extra call to qemu_ram_block_from_host() in the next version.

> qemu_ram_block_from_host() and imposing an extra restriction that all
> elements need to belong to one RAMBlock.
I thought you suggested that we need to ensure all entries (need to be
validated and) are associated with the same memory region? As
virtio_gpu_create_udmabuf() was not doing that, I thought this
change/restriction needs to be added.

And, since calling ram_block_is_memfd_backed() for each entry would
incur extra overhead (as there can be thousands of entries and fcntl needs
to check with the kernel), I figured calling it once for the first ram block
and comparing all the other ram blocks against it made sense.

Also, rethinking this whole situation again, I don't think we should try to create
a dmabuf for a buffer that might have mixed/different memory regions or
memfds (as this is most likely an invalid scenario that could lead to undefined
behavior) so this change is meant to prevent such scenario.

Thanks,
Vivek

> 
> If anyone else have some opinion on this, I'd like to hear.
> 
> Regards,
> Akihiko Odaki
> 
> >       list = g_malloc0(sizeof(struct udmabuf_create_list) +
> >                        sizeof(struct udmabuf_create_item) * res->iov_cnt);
> >
> >       for (i = 0; i < res->iov_cnt; i++) {
> > -        rcu_read_lock();
> >           rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
> > -        rcu_read_unlock();
> > -
> > -        if (!rb || rb->fd < 0) {
> > +        if (rb != first_rb) {
> >               g_free(list);
> >               return;
> >           }
> > @@ -81,11 +116,77 @@ static void virtio_gpu_create_udmabuf(struct
> virtio_gpu_simple_resource *res)
> >       g_free(list);
> >   }
> >
> > +static void *vfio_dmabuf_mmap(struct virtio_gpu_simple_resource *res)
> > +{
> > +    struct vfio_region_info *info = NULL;
> > +    VFIODevice *vbasedev = NULL;
> > +    ram_addr_t offset, len = 0;
> > +    RAMBlock *first_rb, *rb;
> > +    void *map, *submap;
> > +    int i, ret = -1;
> > +
> > +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
> &offset);
> > +    if (!first_rb) {
> > +        return MAP_FAILED;
> > +    }
> > +#if defined(VIRTIO_GPU_VFIO_BLOB)
> > +    vbasedev = vfio_device_lookup(first_rb->mr);
> > +#endif
> > +    if (!vbasedev) {
> > +        return MAP_FAILED;
> > +    }
> > +
> > +    /*
> > +     * We first reserve a contiguous chunk of address space for the entire
> > +     * dmabuf, then replace it with smaller mappings that correspond to the
> > +     * individual segments of the dmabuf.
> > +     */
> > +    map = mmap(NULL, res->blob_size, PROT_READ, MAP_SHARED,
> vbasedev->fd, 0);
> > +    if (map == MAP_FAILED) {
> > +        return map;
> > +    }
> > +
> > +    for (i = 0; i < res->iov_cnt; i++) {
> > +        rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
> > +        if (rb != first_rb) {
> > +            goto err;
> > +        }
> > +#if defined(VIRTIO_GPU_VFIO_BLOB)
> > +        ret = vfio_get_region_index_from_mr(rb->mr);
> > +        if (ret < 0) {
> > +            goto err;
> > +        }
> > +
> > +        ret = vfio_device_get_region_info(vbasedev, ret, &info);
> > +#endif
> > +        if (ret < 0 || !info) {
> > +            goto err;
> > +        }
> > +
> > +        submap = mmap(map + len, res->iov[i].iov_len, PROT_READ,
> > +                      MAP_SHARED | MAP_FIXED, vbasedev->fd,
> > +                      info->offset + offset);
> > +        if (submap == MAP_FAILED) {
> > +            goto err;
> > +        }
> > +
> > +        len += res->iov[i].iov_len;
> > +    }
> > +    return map;
> > +err:
> > +    munmap(map, res->blob_size);
> > +    return MAP_FAILED;
> > +}
> > +
> >   static void virtio_gpu_remap_dmabuf(struct virtio_gpu_simple_resource
> *res)
> >   {
> >       res->remapped = mmap(NULL, res->blob_size, PROT_READ,
> >                            MAP_SHARED, res->dmabuf_fd, 0);
> >       if (res->remapped == MAP_FAILED) {
> > +        res->remapped = vfio_dmabuf_mmap(res);
> > +        if (res->remapped != MAP_FAILED) {
> > +            return;
> > +        }
> >           warn_report("%s: dmabuf mmap failed: %s", __func__,
> >                       strerror(errno));
> >           res->remapped = NULL;
> > @@ -146,6 +247,13 @@ void virtio_gpu_init_dmabuf(struct
> virtio_gpu_simple_resource *res)
> >       } else {
> >           virtio_gpu_create_udmabuf(res);
> >           if (res->dmabuf_fd < 0) {
> > +            vfio_create_dmabuf(res);
> > +        }
> > +
> > +        if (res->dmabuf_fd < 0) {
> > +            qemu_log_mask(LOG_GUEST_ERROR,
> > +                          "%s: memory region cannot be used to create dmabuf\n",
> > +                          __func__);
> >               return;
> >           }
> >           virtio_gpu_remap_dmabuf(res);
> > @@ -160,9 +268,7 @@ void virtio_gpu_init_dmabuf(struct
> virtio_gpu_simple_resource *res)
> >
> >   void virtio_gpu_fini_dmabuf(struct virtio_gpu_simple_resource *res)
> >   {
> > -    if (res->remapped) {
> > -        virtio_gpu_destroy_dmabuf(res);
> > -    }
> > +    virtio_gpu_destroy_dmabuf(res);
> >   }
> >
> >   static void virtio_gpu_free_dmabuf(VirtIOGPU *g, VGPUDMABuf *dmabuf)

Re: [PATCH v3 9/9] virtio-gpu-dmabuf: Create dmabuf for blobs associated with VFIO devices

Posted by Akihiko Odaki 2 months, 2 weeks ago

On 2025/11/23 15:17, Kasireddy, Vivek wrote:
> Hi Akihiko,
> 
>> Subject: Re: [PATCH v3 9/9] virtio-gpu-dmabuf: Create dmabuf for blobs
>> associated with VFIO devices
>>
>> On 2025/11/22 15:46, Vivek Kasireddy wrote:
>>> In addition to memfd, a blob resource can also have its backing
>>> storage in a VFIO device region. Therefore, we first need to figure
>>> out if the blob is backed by a VFIO device region or a memfd before
>>> we can call the right API to get a dmabuf fd created.
>>>
>>> So, we first call virtio_gpu_create_udmabuf() which further calls
>>> ram_block_is_memfd_backed() to check if the blob's backing storage
>>> is located in a memfd or not. If it is not, we call vfio_create_dmabuf()
>>> which identifies the right VFIO device and eventually invokes the
>>> API vfio_device_create_dmabuf_fd() to have a dmabuf fd created.
>>>
>>> Note that in virtio_gpu_remap_dmabuf(), we first try to test if
>>> the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
>>> use the VFIO device fd directly to create the CPU mapping.
>>>
>>> While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
>>> from virtio_gpu_create_udmabuf().
>>>
>>> Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
>>> Cc: Alex Bennée <alex.bennee@linaro.org>
>>> Cc: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
>>> Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
>>> Cc: Alex Williamson <alex@shazbot.org>
>>> Cc: Cédric Le Goater <clg@redhat.com>
>>> Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
>>> ---
>>>    hw/display/Kconfig             |   5 ++
>>>    hw/display/virtio-gpu-dmabuf.c | 122
>> ++++++++++++++++++++++++++++++---
>>>    2 files changed, 119 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/hw/display/Kconfig b/hw/display/Kconfig
>>> index 1e95ab28ef..0d090f25f5 100644
>>> --- a/hw/display/Kconfig
>>> +++ b/hw/display/Kconfig
>>> @@ -106,6 +106,11 @@ config VIRTIO_VGA
>>>        depends on VIRTIO_PCI
>>>        select VGA
>>>
>>> +config VIRTIO_GPU_VFIO_BLOB
>>> +    bool
>>> +    default y
>>> +    depends on VFIO
>>> +
>>>    config VHOST_USER_GPU
>>>        bool
>>>        default y
>>> diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-
>> dmabuf.c
>>> index 258c48d31b..d121a2c9a7 100644
>>> --- a/hw/display/virtio-gpu-dmabuf.c
>>> +++ b/hw/display/virtio-gpu-dmabuf.c
>>> @@ -18,6 +18,7 @@
>>>    #include "ui/console.h"
>>>    #include "hw/virtio/virtio-gpu.h"
>>>    #include "hw/virtio/virtio-gpu-pixman.h"
>>> +#include "hw/vfio/vfio-device.h"
>>>    #include "trace.h"
>>>    #include "system/ramblock.h"
>>>    #include "system/hostmem.h"
>>> @@ -40,10 +41,42 @@ static bool ram_block_is_memfd_backed(RAMBlock
>> *rb)
>>>        return false;
>>>    }
>>>
>>> +static void vfio_create_dmabuf(struct virtio_gpu_simple_resource *res)
>>> +{
>>> +#if defined(VIRTIO_GPU_VFIO_BLOB)
>>> +    VFIODevice *vbasedev;
>>> +    RAMBlock *first_rb;
>>> +    ram_addr_t offset;
>>> +
>>> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
>> &offset);
>>> +    if (!first_rb) {
>>> +        qemu_log_mask(LOG_GUEST_ERROR,
>>> +                      "%s: Could not find ramblock\n", __func__);
>>> +        return;
>>> +    }
>>> +
>>> +    vbasedev = vfio_device_lookup(first_rb->mr);
>>> +    if (!vbasedev) {
>>> +        qemu_log_mask(LOG_GUEST_ERROR,
>>> +                      "%s: No VFIO device found to create dmabuf from\n",
>>> +                      __func__);
>>> +        return;
>>> +    }
>>> +
>>> +    res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
>>> +                                                  res->iov, res->iov_cnt);
>>> +    if (res->dmabuf_fd < 0) {
>>> +        qemu_log_mask(LOG_GUEST_ERROR,
>>> +                      "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
>>> +                      __func__, strerror(errno));
>>> +    }
>>> +#endif
>>> +}
>>> +
>>>    static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource
>> *res)
>>>    {
>>>        struct udmabuf_create_list *list;
>>> -    RAMBlock *rb;
>>> +    RAMBlock *rb, *first_rb;
>>>        ram_addr_t offset;
>>>        int udmabuf, i;
>>>
>>> @@ -52,15 +85,17 @@ static void virtio_gpu_create_udmabuf(struct
>> virtio_gpu_simple_resource *res)
>>>            return;
>>>        }
>>>
>>> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
>> &offset);
>>> +    if (!ram_block_is_memfd_backed(first_rb)) {
>>> +        return;
>>> +    }
>>> +
>>
>> We had an extensive discussion but I still don't understand the benefit
>> of this change while I see it adds complexity by having another call of
> I'll get rid of the extra call to qemu_ram_block_from_host() in the next version.

It is possible to reduce the number of the execution of 
qemu_ram_block_from_host() calls, but the code complexity remains unless 
you keep the original code.

> 
>> qemu_ram_block_from_host() and imposing an extra restriction that all
>> elements need to belong to one RAMBlock.
> I thought you suggested that we need to ensure all entries (need to be
> validated and) are associated with the same memory region? As
> virtio_gpu_create_udmabuf() was not doing that, I thought this
> change/restriction needs to be added.

My first comment I raised for "[PATCH v2 09/10] virtio-gpu-dmabuf: 
Introduce qemu_iovec_same_memory_regions()" was that it complicates the 
codebase without necessity.
https://lore.kernel.org/qemu-devel/83274ca7-dd37-4856-b198-f334bf630835@rsg.ci.i.u-tokyo.ac.jp/

> 
> And, since calling ram_block_is_memfd_backed() for each entry would
> incur extra overhead (as there can be thousands of entries and fcntl needs
> to check with the kernel), I figured calling it once for the first ram block
> and comparing all the other ram blocks against it made sense.

I don't think the change is irrelevant with adding support of VFIO, and 
I doubt its necessity either; the UDMABUF_CREATE_LIST ioctl will catch it.

> 
> Also, rethinking this whole situation again, I don't think we should try to create
> a dmabuf for a buffer that might have mixed/different memory regions or
> memfds (as this is most likely an invalid scenario that could lead to undefined
> behavior) so this change is meant to prevent such scenario.

In the email thread I cited I only said the check is unnecessary, but it 
can be also problematic.

For example, if you hotplug some memory, the memory can be backed by 
another memfd, and the kernel may use both the existing memory and the 
hotplugged one to back a virtually contiguous region allocated for a 
virtio-gpu resource. You may look at drm_gem_get_pages() in Linux and 
find out that it allocates on a per-page basis.

Regards,
Akihiko Odaki

RE: [PATCH v3 9/9] virtio-gpu-dmabuf: Create dmabuf for blobs associated with VFIO devices

Posted by Kasireddy, Vivek 2 months, 1 week ago

Hi Akihiko,

> Subject: Re: [PATCH v3 9/9] virtio-gpu-dmabuf: Create dmabuf for blobs
> associated with VFIO devices
> >> On 2025/11/22 15:46, Vivek Kasireddy wrote:
> >>> In addition to memfd, a blob resource can also have its backing
> >>> storage in a VFIO device region. Therefore, we first need to figure
> >>> out if the blob is backed by a VFIO device region or a memfd before
> >>> we can call the right API to get a dmabuf fd created.
> >>>
> >>> So, we first call virtio_gpu_create_udmabuf() which further calls
> >>> ram_block_is_memfd_backed() to check if the blob's backing storage
> >>> is located in a memfd or not. If it is not, we call vfio_create_dmabuf()
> >>> which identifies the right VFIO device and eventually invokes the
> >>> API vfio_device_create_dmabuf_fd() to have a dmabuf fd created.
> >>>
> >>> Note that in virtio_gpu_remap_dmabuf(), we first try to test if
> >>> the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
> >>> use the VFIO device fd directly to create the CPU mapping.
> >>>
> >>> While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
> >>> from virtio_gpu_create_udmabuf().
> >>>
> >>> Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
> >>> Cc: Alex Bennée <alex.bennee@linaro.org>
> >>> Cc: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
> >>> Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> >>> Cc: Alex Williamson <alex@shazbot.org>
> >>> Cc: Cédric Le Goater <clg@redhat.com>
> >>> Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
> >>> ---
> >>>    hw/display/Kconfig             |   5 ++
> >>>    hw/display/virtio-gpu-dmabuf.c | 122
> >> ++++++++++++++++++++++++++++++---
> >>>    2 files changed, 119 insertions(+), 8 deletions(-)
> >>>
> >>> diff --git a/hw/display/Kconfig b/hw/display/Kconfig
> >>> index 1e95ab28ef..0d090f25f5 100644
> >>> --- a/hw/display/Kconfig
> >>> +++ b/hw/display/Kconfig
> >>> @@ -106,6 +106,11 @@ config VIRTIO_VGA
> >>>        depends on VIRTIO_PCI
> >>>        select VGA
> >>>
> >>> +config VIRTIO_GPU_VFIO_BLOB
> >>> +    bool
> >>> +    default y
> >>> +    depends on VFIO
> >>> +
> >>>    config VHOST_USER_GPU
> >>>        bool
> >>>        default y
> >>> diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-
> >> dmabuf.c
> >>> index 258c48d31b..d121a2c9a7 100644
> >>> --- a/hw/display/virtio-gpu-dmabuf.c
> >>> +++ b/hw/display/virtio-gpu-dmabuf.c
> >>> @@ -18,6 +18,7 @@
> >>>    #include "ui/console.h"
> >>>    #include "hw/virtio/virtio-gpu.h"
> >>>    #include "hw/virtio/virtio-gpu-pixman.h"
> >>> +#include "hw/vfio/vfio-device.h"
> >>>    #include "trace.h"
> >>>    #include "system/ramblock.h"
> >>>    #include "system/hostmem.h"
> >>> @@ -40,10 +41,42 @@ static bool
> ram_block_is_memfd_backed(RAMBlock
> >> *rb)
> >>>        return false;
> >>>    }
> >>>
> >>> +static void vfio_create_dmabuf(struct virtio_gpu_simple_resource *res)
> >>> +{
> >>> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> >>> +    VFIODevice *vbasedev;
> >>> +    RAMBlock *first_rb;
> >>> +    ram_addr_t offset;
> >>> +
> >>> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
> >> &offset);
> >>> +    if (!first_rb) {
> >>> +        qemu_log_mask(LOG_GUEST_ERROR,
> >>> +                      "%s: Could not find ramblock\n", __func__);
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    vbasedev = vfio_device_lookup(first_rb->mr);
> >>> +    if (!vbasedev) {
> >>> +        qemu_log_mask(LOG_GUEST_ERROR,
> >>> +                      "%s: No VFIO device found to create dmabuf from\n",
> >>> +                      __func__);
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
> >>> +                                                  res->iov, res->iov_cnt);
> >>> +    if (res->dmabuf_fd < 0) {
> >>> +        qemu_log_mask(LOG_GUEST_ERROR,
> >>> +                      "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
> >>> +                      __func__, strerror(errno));
> >>> +    }
> >>> +#endif
> >>> +}
> >>> +
> >>>    static void virtio_gpu_create_udmabuf(struct
> virtio_gpu_simple_resource
> >> *res)
> >>>    {
> >>>        struct udmabuf_create_list *list;
> >>> -    RAMBlock *rb;
> >>> +    RAMBlock *rb, *first_rb;
> >>>        ram_addr_t offset;
> >>>        int udmabuf, i;
> >>>
> >>> @@ -52,15 +85,17 @@ static void virtio_gpu_create_udmabuf(struct
> >> virtio_gpu_simple_resource *res)
> >>>            return;
> >>>        }
> >>>
> >>> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
> >> &offset);
> >>> +    if (!ram_block_is_memfd_backed(first_rb)) {
> >>> +        return;
> >>> +    }
> >>> +
> >>
> >> We had an extensive discussion but I still don't understand the benefit
> >> of this change while I see it adds complexity by having another call of
> > I'll get rid of the extra call to qemu_ram_block_from_host() in the next
> version.
> 
> It is possible to reduce the number of the execution of
> qemu_ram_block_from_host() calls, but the code complexity remains unless
> you keep the original code.
> 
> >
> >> qemu_ram_block_from_host() and imposing an extra restriction that all
> >> elements need to belong to one RAMBlock.
> > I thought you suggested that we need to ensure all entries (need to be
> > validated and) are associated with the same memory region? As
> > virtio_gpu_create_udmabuf() was not doing that, I thought this
> > change/restriction needs to be added.
> 
> My first comment I raised for "[PATCH v2 09/10] virtio-gpu-dmabuf:
> Introduce qemu_iovec_same_memory_regions()" was that it complicates
> the
> codebase without necessity.
> https://lore.kernel.org/qemu-devel/83274ca7-dd37-4856-b198-
> f334bf630835@rsg.ci.i.u-tokyo.ac.jp/
> 
> >
> > And, since calling ram_block_is_memfd_backed() for each entry would
> > incur extra overhead (as there can be thousands of entries and fcntl needs
> > to check with the kernel), I figured calling it once for the first ram block
> > and comparing all the other ram blocks against it made sense.
> 
> I don't think the change is irrelevant with adding support of VFIO, and
> I doubt its necessity either; the UDMABUF_CREATE_LIST ioctl will catch it.
> 
> >
> > Also, rethinking this whole situation again, I don't think we should try to
> create
> > a dmabuf for a buffer that might have mixed/different memory regions or
> > memfds (as this is most likely an invalid scenario that could lead to
> undefined
> > behavior) so this change is meant to prevent such scenario.
> 
> In the email thread I cited I only said the check is unnecessary, but it
> can be also problematic.
> 
> For example, if you hotplug some memory, the memory can be backed by
> another memfd, and the kernel may use both the existing memory and the
> hotplugged one to back a virtually contiguous region allocated for a
> virtio-gpu resource. You may look at drm_gem_get_pages() in Linux and
> find out that it allocates on a per-page basis.
Ok, this seems like a case where having multiple memfds associated with a
dmabuf makes sense, so, I'll go ahead and drop this extra check/restriction
in the next version.

Thanks,
Vivek

> 
> Regards,
> Akihiko Odaki