[PATCH v4 8/8] virtio-gpu-dmabuf: Create dmabuf for blobs associated with VFIO devices

Vivek Kasireddy posted 8 patches 2 weeks, 4 days ago
Maintainers: Paolo Bonzini <pbonzini@redhat.com>, "Michael S. Tsirkin" <mst@redhat.com>, "Alex Bennée" <alex.bennee@linaro.org>, Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>, Dmitry Osipenko <dmitry.osipenko@collabora.com>, Alex Williamson <alex@shazbot.org>, "Cédric Le Goater" <clg@redhat.com>
There is a newer version of this series
[PATCH v4 8/8] virtio-gpu-dmabuf: Create dmabuf for blobs associated with VFIO devices
Posted by Vivek Kasireddy 2 weeks, 4 days ago
In addition to memfd, a blob resource can also have its backing
storage in a VFIO device region. Since, there is no effective way
to determine where the backing storage is located, we first try to
create a dmabuf assuming it is in memfd. If that fails, we try to
create a dmabuf assuming it is in VFIO device region.

So, we first call virtio_gpu_create_udmabuf() to check if the blob's
backing storage is located in a memfd or not. If it is not, we call
vfio_create_dmabuf() which identifies the right VFIO device and
eventually invokes the vfio_device_create_dmabuf_fd() API to have
a dmabuf fd created.

Note that in virtio_gpu_remap_dmabuf(), we first try to test if
the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
use the VFIO device fd directly to create the CPU mapping.

While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
from virtio_gpu_create_udmabuf() and also replace warn_report()
with qemu_log_mask().

Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
Cc: Alex Bennée <alex.bennee@linaro.org>
Cc: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Cc: Alex Williamson <alex@shazbot.org>
Cc: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
---
 hw/display/Kconfig             |   5 ++
 hw/display/virtio-gpu-dmabuf.c | 117 ++++++++++++++++++++++++++++++---
 2 files changed, 113 insertions(+), 9 deletions(-)

diff --git a/hw/display/Kconfig b/hw/display/Kconfig
index 1e95ab28ef..0d090f25f5 100644
--- a/hw/display/Kconfig
+++ b/hw/display/Kconfig
@@ -106,6 +106,11 @@ config VIRTIO_VGA
     depends on VIRTIO_PCI
     select VGA
 
+config VIRTIO_GPU_VFIO_BLOB
+    bool
+    default y
+    depends on VFIO
+
 config VHOST_USER_GPU
     bool
     default y
diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-dmabuf.c
index c34d4c85bc..2dfe70d7eb 100644
--- a/hw/display/virtio-gpu-dmabuf.c
+++ b/hw/display/virtio-gpu-dmabuf.c
@@ -18,6 +18,7 @@
 #include "ui/console.h"
 #include "hw/virtio/virtio-gpu.h"
 #include "hw/virtio/virtio-gpu-pixman.h"
+#include "hw/vfio/vfio-device.h"
 #include "trace.h"
 #include "system/ramblock.h"
 #include "system/hostmem.h"
@@ -27,6 +28,38 @@
 #include "standard-headers/linux/udmabuf.h"
 #include "standard-headers/drm/drm_fourcc.h"
 
+static void vfio_create_dmabuf(struct virtio_gpu_simple_resource *res)
+{
+#if defined(VIRTIO_GPU_VFIO_BLOB)
+    VFIODevice *vbasedev;
+    RAMBlock *first_rb;
+    ram_addr_t offset;
+
+    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
+    if (!first_rb) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: Could not find ramblock\n", __func__);
+        return;
+    }
+
+    vbasedev = vfio_device_lookup(first_rb->mr);
+    if (!vbasedev) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: No VFIO device found to create dmabuf from\n",
+                      __func__);
+        return;
+    }
+
+    res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
+                                                  res->iov, res->iov_cnt);
+    if (res->dmabuf_fd < 0) {
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
+                      __func__, strerror(errno));
+    }
+#endif
+}
+
 static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
 {
     struct udmabuf_create_list *list;
@@ -43,10 +76,7 @@ static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
                      sizeof(struct udmabuf_create_item) * res->iov_cnt);
 
     for (i = 0; i < res->iov_cnt; i++) {
-        rcu_read_lock();
         rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
-        rcu_read_unlock();
-
         if (!rb || rb->fd < 0) {
             g_free(list);
             return;
@@ -62,17 +92,84 @@ static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
 
     res->dmabuf_fd = ioctl(udmabuf, UDMABUF_CREATE_LIST, list);
     if (res->dmabuf_fd < 0) {
-        warn_report("%s: UDMABUF_CREATE_LIST: %s", __func__,
-                    strerror(errno));
+        qemu_log_mask(LOG_GUEST_ERROR,
+                      "%s: UDMABUF_CREATE_LIST: %s\n",
+                      __func__, strerror(errno));
     }
     g_free(list);
 }
 
+static void *vfio_dmabuf_mmap(struct virtio_gpu_simple_resource *res)
+{
+    struct vfio_region_info *info = NULL;
+    VFIODevice *vbasedev = NULL;
+    ram_addr_t offset, len = 0;
+    RAMBlock *first_rb, *rb;
+    void *map, *submap;
+    int i, ret = -1;
+
+    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
+    if (!first_rb) {
+        return MAP_FAILED;
+    }
+#if defined(VIRTIO_GPU_VFIO_BLOB)
+    vbasedev = vfio_device_lookup(first_rb->mr);
+#endif
+    if (!vbasedev) {
+        return MAP_FAILED;
+    }
+
+    /*
+     * We first reserve a contiguous chunk of address space for the entire
+     * dmabuf, then replace it with smaller mappings that correspond to the
+     * individual segments of the dmabuf.
+     */
+    map = mmap(NULL, res->blob_size, PROT_READ, MAP_SHARED, vbasedev->fd, 0);
+    if (map == MAP_FAILED) {
+        return map;
+    }
+
+    for (i = 0; i < res->iov_cnt; i++) {
+        rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
+        if (rb != first_rb) {
+            goto err;
+        }
+#if defined(VIRTIO_GPU_VFIO_BLOB)
+        ret = vfio_get_region_index_from_mr(rb->mr);
+        if (ret < 0) {
+            goto err;
+        }
+
+        ret = vfio_device_get_region_info(vbasedev, ret, &info);
+#endif
+        if (ret < 0 || !info) {
+            goto err;
+        }
+
+        submap = mmap(map + len, res->iov[i].iov_len, PROT_READ,
+                      MAP_SHARED | MAP_FIXED, vbasedev->fd,
+                      info->offset + offset);
+        if (submap == MAP_FAILED) {
+            goto err;
+        }
+
+        len += res->iov[i].iov_len;
+    }
+    return map;
+err:
+    munmap(map, res->blob_size);
+    return MAP_FAILED;
+}
+
 static void virtio_gpu_remap_dmabuf(struct virtio_gpu_simple_resource *res)
 {
     res->remapped = mmap(NULL, res->blob_size, PROT_READ,
                          MAP_SHARED, res->dmabuf_fd, 0);
     if (res->remapped == MAP_FAILED) {
+        res->remapped = vfio_dmabuf_mmap(res);
+        if (res->remapped != MAP_FAILED) {
+            return;
+        }
         warn_report("%s: dmabuf mmap failed: %s", __func__,
                     strerror(errno));
         res->remapped = NULL;
@@ -139,8 +236,12 @@ void virtio_gpu_init_dmabuf(struct virtio_gpu_simple_resource *res)
     } else {
         virtio_gpu_create_udmabuf(res);
         if (res->dmabuf_fd < 0) {
-            return;
+            vfio_create_dmabuf(res);
+            if (res->dmabuf_fd < 0) {
+                return;
+            }
         }
+
         virtio_gpu_remap_dmabuf(res);
         if (!res->remapped) {
             return;
@@ -153,9 +254,7 @@ void virtio_gpu_init_dmabuf(struct virtio_gpu_simple_resource *res)
 
 void virtio_gpu_fini_dmabuf(struct virtio_gpu_simple_resource *res)
 {
-    if (res->remapped) {
-        virtio_gpu_destroy_dmabuf(res);
-    }
+    virtio_gpu_destroy_dmabuf(res);
 }
 
 static void virtio_gpu_free_dmabuf(VirtIOGPU *g, VGPUDMABuf *dmabuf)
-- 
2.50.1


Re: [PATCH v4 8/8] virtio-gpu-dmabuf: Create dmabuf for blobs associated with VFIO devices
Posted by Cédric Le Goater 1 week, 6 days ago
On 1/23/26 07:17, Vivek Kasireddy wrote:
> In addition to memfd, a blob resource can also have its backing
> storage in a VFIO device region. Since, there is no effective way
> to determine where the backing storage is located, we first try to
> create a dmabuf assuming it is in memfd. If that fails, we try to
> create a dmabuf assuming it is in VFIO device region.
> 
> So, we first call virtio_gpu_create_udmabuf() to check if the blob's
> backing storage is located in a memfd or not. If it is not, we call
> vfio_create_dmabuf() which identifies the right VFIO device and
> eventually invokes the vfio_device_create_dmabuf_fd() API to have
> a dmabuf fd created.
> 
> Note that in virtio_gpu_remap_dmabuf(), we first try to test if
> the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
> use the VFIO device fd directly to create the CPU mapping.
> 
> While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
> from virtio_gpu_create_udmabuf() and also replace warn_report()
> with qemu_log_mask().
> 
> Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
> Cc: Alex Bennée <alex.bennee@linaro.org>
> Cc: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
> Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> Cc: Alex Williamson <alex@shazbot.org>
> Cc: Cédric Le Goater <clg@redhat.com>
> Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
> ---
>   hw/display/Kconfig             |   5 ++
>   hw/display/virtio-gpu-dmabuf.c | 117 ++++++++++++++++++++++++++++++---
>   2 files changed, 113 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/display/Kconfig b/hw/display/Kconfig
> index 1e95ab28ef..0d090f25f5 100644
> --- a/hw/display/Kconfig
> +++ b/hw/display/Kconfig
> @@ -106,6 +106,11 @@ config VIRTIO_VGA
>       depends on VIRTIO_PCI
>       select VGA
>   
> +config VIRTIO_GPU_VFIO_BLOB
> +    bool
> +    default y
> +    depends on VFIO
> +
>   config VHOST_USER_GPU
>       bool
>       default y
> diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-dmabuf.c
> index c34d4c85bc..2dfe70d7eb 100644
> --- a/hw/display/virtio-gpu-dmabuf.c
> +++ b/hw/display/virtio-gpu-dmabuf.c
> @@ -18,6 +18,7 @@
>   #include "ui/console.h"
>   #include "hw/virtio/virtio-gpu.h"
>   #include "hw/virtio/virtio-gpu-pixman.h"
> +#include "hw/vfio/vfio-device.h"
>   #include "trace.h"
>   #include "system/ramblock.h"
>   #include "system/hostmem.h"
> @@ -27,6 +28,38 @@
>   #include "standard-headers/linux/udmabuf.h"
>   #include "standard-headers/drm/drm_fourcc.h"
>   
> +static void vfio_create_dmabuf(struct virtio_gpu_simple_resource *res)

Could you please not use the 'vfio_' prefix ? Its confusing as there
routines are part of the virtio component.


> +{
> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> +    VFIODevice *vbasedev;
> +    RAMBlock *first_rb;
> +    ram_addr_t offset;
> +
> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
> +    if (!first_rb) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s: Could not find ramblock\n", __func__);

Error handling is quite poor. Can't we have an 'Error *' parameter ?

> +        return;
> +    }
> +
> +    vbasedev = vfio_device_lookup(first_rb->mr);
> +    if (!vbasedev) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s: No VFIO device found to create dmabuf from\n",
> +                      __func__);
> +        return;
> +    }

vfio_dmabuf_mmap() does the same sequence of code to query the VFIODevice.
May be a common helper would help.

> +
> +    res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
> +                                                  res->iov, res->iov_cnt);
> +    if (res->dmabuf_fd < 0) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
> +                      __func__, strerror(errno));
> +    }
> +#endif
> +}
> +
>   static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
>   {
>       struct udmabuf_create_list *list;
> @@ -43,10 +76,7 @@ static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
>                        sizeof(struct udmabuf_create_item) * res->iov_cnt);
>   
>       for (i = 0; i < res->iov_cnt; i++) {
> -        rcu_read_lock();
>           rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
> -        rcu_read_unlock();
> -

The rcu un/lock removal needs some explanations. It deserves a separate patch
I would say.


>           if (!rb || rb->fd < 0) {
>               g_free(list);
>               return;
> @@ -62,17 +92,84 @@ static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
>   
>       res->dmabuf_fd = ioctl(udmabuf, UDMABUF_CREATE_LIST, list);
>       if (res->dmabuf_fd < 0) {
> -        warn_report("%s: UDMABUF_CREATE_LIST: %s", __func__,
> -                    strerror(errno));
> +        qemu_log_mask(LOG_GUEST_ERROR,

Is it a guest error ?

> +                      "%s: UDMABUF_CREATE_LIST: %s\n",
> +                      __func__, strerror(errno));
>       }
>       g_free(list);
>   }
>   
> +static void *vfio_dmabuf_mmap(struct virtio_gpu_simple_resource *res)

please use a "virtio" prefix.

> +{
> +    struct vfio_region_info *info = NULL;
> +    VFIODevice *vbasedev = NULL;
> +    ram_addr_t offset, len = 0;
> +    RAMBlock *first_rb, *rb;
> +    void *map, *submap;
> +    int i, ret = -1;
> +
> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
> +    if (!first_rb) {
> +        return MAP_FAILED;
> +    }
> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> +    vbasedev = vfio_device_lookup(first_rb->mr);
> +#endif
> +    if (!vbasedev) {
> +        return MAP_FAILED;
> +    }
> +
> +    /*
> +     * We first reserve a contiguous chunk of address space for the entire
> +     * dmabuf, then replace it with smaller mappings that correspond to the
> +     * individual segments of the dmabuf.
> +     */
> +    map = mmap(NULL, res->blob_size, PROT_READ, MAP_SHARED, vbasedev->fd, 0);
> +    if (map == MAP_FAILED) {
> +        return map;
> +    }
> +
> +    for (i = 0; i < res->iov_cnt; i++) {
> +        rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
> +        if (rb != first_rb) {
> +            goto err;
> +        }
> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> +        ret = vfio_get_region_index_from_mr(rb->mr);
> +        if (ret < 0) {
> +            goto err;
> +        }
> +
> +        ret = vfio_device_get_region_info(vbasedev, ret, &info);
> +#endif
> +        if (ret < 0 || !info) {
> +            goto err;
> +        }
> +
> +        submap = mmap(map + len, res->iov[i].iov_len, PROT_READ,
> +                      MAP_SHARED | MAP_FIXED, vbasedev->fd,
> +                      info->offset + offset);
> +        if (submap == MAP_FAILED) {
> +            goto err;
> +        }
> +
> +        len += res->iov[i].iov_len;
> +    }
> +    return map;
> +err:
> +    munmap(map, res->blob_size);
> +    return MAP_FAILED;
> +}
> +
>   static void virtio_gpu_remap_dmabuf(struct virtio_gpu_simple_resource *res)
>   {
>       res->remapped = mmap(NULL, res->blob_size, PROT_READ,
>                            MAP_SHARED, res->dmabuf_fd, 0);
>       if (res->remapped == MAP_FAILED) {
> +        res->remapped = vfio_dmabuf_mmap(res);
> +        if (res->remapped != MAP_FAILED) {
> +            return;
> +        }
>           warn_report("%s: dmabuf mmap failed: %s", __func__,
>                       strerror(errno));
>           res->remapped = NULL;
> @@ -139,8 +236,12 @@ void virtio_gpu_init_dmabuf(struct virtio_gpu_simple_resource *res)
>       } else {
>           virtio_gpu_create_udmabuf(res);
>           if (res->dmabuf_fd < 0) {

Why test dmabuf_fd ? Can virtio_gpu_init_dmabuf() be called mulmtiple times ?


Thanks,

C.


> -            return;
> +            vfio_create_dmabuf(res);
> +            if (res->dmabuf_fd < 0) {
> +                return;
> +            }
>           }
> +
>           virtio_gpu_remap_dmabuf(res);
>           if (!res->remapped) {
>               return;
> @@ -153,9 +254,7 @@ void virtio_gpu_init_dmabuf(struct virtio_gpu_simple_resource *res)
>   
>   void virtio_gpu_fini_dmabuf(struct virtio_gpu_simple_resource *res)
>   {
> -    if (res->remapped) {
> -        virtio_gpu_destroy_dmabuf(res);
> -    }
> +    virtio_gpu_destroy_dmabuf(res);
>   }
>   
>   static void virtio_gpu_free_dmabuf(VirtIOGPU *g, VGPUDMABuf *dmabuf)


RE: [PATCH v4 8/8] virtio-gpu-dmabuf: Create dmabuf for blobs associated with VFIO devices
Posted by Kasireddy, Vivek 1 week, 5 days ago
Hi Cedric,

> Subject: Re: [PATCH v4 8/8] virtio-gpu-dmabuf: Create dmabuf for blobs
> associated with VFIO devices
> 
> On 1/23/26 07:17, Vivek Kasireddy wrote:
> > In addition to memfd, a blob resource can also have its backing
> > storage in a VFIO device region. Since, there is no effective way
> > to determine where the backing storage is located, we first try to
> > create a dmabuf assuming it is in memfd. If that fails, we try to
> > create a dmabuf assuming it is in VFIO device region.
> >
> > So, we first call virtio_gpu_create_udmabuf() to check if the blob's
> > backing storage is located in a memfd or not. If it is not, we call
> > vfio_create_dmabuf() which identifies the right VFIO device and
> > eventually invokes the vfio_device_create_dmabuf_fd() API to have
> > a dmabuf fd created.
> >
> > Note that in virtio_gpu_remap_dmabuf(), we first try to test if
> > the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
> > use the VFIO device fd directly to create the CPU mapping.
> >
> > While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
> > from virtio_gpu_create_udmabuf() and also replace warn_report()
> > with qemu_log_mask().
> >
> > Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
> > Cc: Alex Bennée <alex.bennee@linaro.org>
> > Cc: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
> > Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> > Cc: Alex Williamson <alex@shazbot.org>
> > Cc: Cédric Le Goater <clg@redhat.com>
> > Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
> > ---
> >   hw/display/Kconfig             |   5 ++
> >   hw/display/virtio-gpu-dmabuf.c | 117
> ++++++++++++++++++++++++++++++---
> >   2 files changed, 113 insertions(+), 9 deletions(-)
> >
> > diff --git a/hw/display/Kconfig b/hw/display/Kconfig
> > index 1e95ab28ef..0d090f25f5 100644
> > --- a/hw/display/Kconfig
> > +++ b/hw/display/Kconfig
> > @@ -106,6 +106,11 @@ config VIRTIO_VGA
> >       depends on VIRTIO_PCI
> >       select VGA
> >
> > +config VIRTIO_GPU_VFIO_BLOB
> > +    bool
> > +    default y
> > +    depends on VFIO
> > +
> >   config VHOST_USER_GPU
> >       bool
> >       default y
> > diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-
> dmabuf.c
> > index c34d4c85bc..2dfe70d7eb 100644
> > --- a/hw/display/virtio-gpu-dmabuf.c
> > +++ b/hw/display/virtio-gpu-dmabuf.c
> > @@ -18,6 +18,7 @@
> >   #include "ui/console.h"
> >   #include "hw/virtio/virtio-gpu.h"
> >   #include "hw/virtio/virtio-gpu-pixman.h"
> > +#include "hw/vfio/vfio-device.h"
> >   #include "trace.h"
> >   #include "system/ramblock.h"
> >   #include "system/hostmem.h"
> > @@ -27,6 +28,38 @@
> >   #include "standard-headers/linux/udmabuf.h"
> >   #include "standard-headers/drm/drm_fourcc.h"
> >
> > +static void vfio_create_dmabuf(struct virtio_gpu_simple_resource
> *res)
> 
> Could you please not use the 'vfio_' prefix ? Its confusing as there
> routines are part of the virtio component.
Ok, I'll use a different prefix. 

> 
> 
> > +{
> > +#if defined(VIRTIO_GPU_VFIO_BLOB)
> > +    VFIODevice *vbasedev;
> > +    RAMBlock *first_rb;
> > +    ram_addr_t offset;
> > +
> > +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
> &offset);
> > +    if (!first_rb) {
> > +        qemu_log_mask(LOG_GUEST_ERROR,
> > +                      "%s: Could not find ramblock\n", __func__);
> 
> Error handling is quite poor. Can't we have an 'Error *' parameter ?
Ok, I'll improve the error handling by introducing Error * here.

> 
> > +        return;
> > +    }
> > +
> > +    vbasedev = vfio_device_lookup(first_rb->mr);
> > +    if (!vbasedev) {
> > +        qemu_log_mask(LOG_GUEST_ERROR,
> > +                      "%s: No VFIO device found to create dmabuf from\n",
> > +                      __func__);
> > +        return;
> > +    }
> 
> vfio_dmabuf_mmap() does the same sequence of code to query the
> VFIODevice.
> May be a common helper would help.
Sure, I'll try to figure out a common helper.

> 
> > +
> > +    res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
> > +                                                  res->iov, res->iov_cnt);
> > +    if (res->dmabuf_fd < 0) {
> > +        qemu_log_mask(LOG_GUEST_ERROR,
> > +                      "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
> > +                      __func__, strerror(errno));
> > +    }
> > +#endif
> > +}
> > +
> >   static void virtio_gpu_create_udmabuf(struct
> virtio_gpu_simple_resource *res)
> >   {
> >       struct udmabuf_create_list *list;
> > @@ -43,10 +76,7 @@ static void virtio_gpu_create_udmabuf(struct
> virtio_gpu_simple_resource *res)
> >                        sizeof(struct udmabuf_create_item) * res->iov_cnt);
> >
> >       for (i = 0; i < res->iov_cnt; i++) {
> > -        rcu_read_lock();
> >           rb = qemu_ram_block_from_host(res->iov[i].iov_base, false,
> &offset);
> > -        rcu_read_unlock();
> > -
> 
> The rcu un/lock removal needs some explanations. It deserves a separate
> patch
> I would say.
Ok, I'll split these changes out into separate patches.

> 
> 
> >           if (!rb || rb->fd < 0) {
> >               g_free(list);
> >               return;
> > @@ -62,17 +92,84 @@ static void virtio_gpu_create_udmabuf(struct
> virtio_gpu_simple_resource *res)
> >
> >       res->dmabuf_fd = ioctl(udmabuf, UDMABUF_CREATE_LIST, list);
> >       if (res->dmabuf_fd < 0) {
> > -        warn_report("%s: UDMABUF_CREATE_LIST: %s", __func__,
> > -                    strerror(errno));
> > +        qemu_log_mask(LOG_GUEST_ERROR,
> 
> Is it a guest error ?
IIUC, it can be considered a guest error as it failed to create a dmabuf
on the Host using the udmabuf driver and Guest provided addresses. 

> 
> > +                      "%s: UDMABUF_CREATE_LIST: %s\n",
> > +                      __func__, strerror(errno));
> >       }
> >       g_free(list);
> >   }
> >
> > +static void *vfio_dmabuf_mmap(struct virtio_gpu_simple_resource
> *res)
> 
> please use a "virtio" prefix.
As suggested by Akihiko, I am considering moving this routine to VFIO in
order to avoid exposing VFIODevice or VFIODevice->fd outside of VFIO.

> 
> > +{
> > +    struct vfio_region_info *info = NULL;
> > +    VFIODevice *vbasedev = NULL;
> > +    ram_addr_t offset, len = 0;
> > +    RAMBlock *first_rb, *rb;
> > +    void *map, *submap;
> > +    int i, ret = -1;
> > +
> > +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
> &offset);
> > +    if (!first_rb) {
> > +        return MAP_FAILED;
> > +    }
> > +#if defined(VIRTIO_GPU_VFIO_BLOB)
> > +    vbasedev = vfio_device_lookup(first_rb->mr);
> > +#endif
> > +    if (!vbasedev) {
> > +        return MAP_FAILED;
> > +    }
> > +
> > +    /*
> > +     * We first reserve a contiguous chunk of address space for the entire
> > +     * dmabuf, then replace it with smaller mappings that correspond to
> the
> > +     * individual segments of the dmabuf.
> > +     */
> > +    map = mmap(NULL, res->blob_size, PROT_READ, MAP_SHARED,
> vbasedev->fd, 0);
> > +    if (map == MAP_FAILED) {
> > +        return map;
> > +    }
> > +
> > +    for (i = 0; i < res->iov_cnt; i++) {
> > +        rb = qemu_ram_block_from_host(res->iov[i].iov_base, false,
> &offset);
> > +        if (rb != first_rb) {
> > +            goto err;
> > +        }
> > +#if defined(VIRTIO_GPU_VFIO_BLOB)
> > +        ret = vfio_get_region_index_from_mr(rb->mr);
> > +        if (ret < 0) {
> > +            goto err;
> > +        }
> > +
> > +        ret = vfio_device_get_region_info(vbasedev, ret, &info);
> > +#endif
> > +        if (ret < 0 || !info) {
> > +            goto err;
> > +        }
> > +
> > +        submap = mmap(map + len, res->iov[i].iov_len, PROT_READ,
> > +                      MAP_SHARED | MAP_FIXED, vbasedev->fd,
> > +                      info->offset + offset);
> > +        if (submap == MAP_FAILED) {
> > +            goto err;
> > +        }
> > +
> > +        len += res->iov[i].iov_len;
> > +    }
> > +    return map;
> > +err:
> > +    munmap(map, res->blob_size);
> > +    return MAP_FAILED;
> > +}
> > +
> >   static void virtio_gpu_remap_dmabuf(struct
> virtio_gpu_simple_resource *res)
> >   {
> >       res->remapped = mmap(NULL, res->blob_size, PROT_READ,
> >                            MAP_SHARED, res->dmabuf_fd, 0);
> >       if (res->remapped == MAP_FAILED) {
> > +        res->remapped = vfio_dmabuf_mmap(res);
> > +        if (res->remapped != MAP_FAILED) {
> > +            return;
> > +        }
> >           warn_report("%s: dmabuf mmap failed: %s", __func__,
> >                       strerror(errno));
> >           res->remapped = NULL;
> > @@ -139,8 +236,12 @@ void virtio_gpu_init_dmabuf(struct
> virtio_gpu_simple_resource *res)
> >       } else {
> >           virtio_gpu_create_udmabuf(res);
> >           if (res->dmabuf_fd < 0) {
> 
> Why test dmabuf_fd ? Can virtio_gpu_init_dmabuf() be called mulmtiple
> times ?
Yes, virtio_gpu_init_dmabuf() can be called multiple times as the Guest VM
creates/uses new resources/Guest blobs, which can happen frequently.
Are you suggesting that it is better to make virtio_gpu_create_udmabuf()
return res->dmabuf_fd explicitly as opposed to the current indirect approach?

Thanks,
Vivek

> 
> 
> Thanks,
> 
> C.
> 
> 
> > -            return;
> > +            vfio_create_dmabuf(res);
> > +            if (res->dmabuf_fd < 0) {
> > +                return;
> > +            }
> >           }
> > +
> >           virtio_gpu_remap_dmabuf(res);
> >           if (!res->remapped) {
> >               return;
> > @@ -153,9 +254,7 @@ void virtio_gpu_init_dmabuf(struct
> virtio_gpu_simple_resource *res)
> >
> >   void virtio_gpu_fini_dmabuf(struct virtio_gpu_simple_resource *res)
> >   {
> > -    if (res->remapped) {
> > -        virtio_gpu_destroy_dmabuf(res);
> > -    }
> > +    virtio_gpu_destroy_dmabuf(res);
> >   }
> >
> >   static void virtio_gpu_free_dmabuf(VirtIOGPU *g, VGPUDMABuf
> *dmabuf)
Re: [PATCH v4 8/8] virtio-gpu-dmabuf: Create dmabuf for blobs associated with VFIO devices
Posted by Cédric Le Goater 1 week, 5 days ago
Hi,


>>>    {
>>>        res->remapped = mmap(NULL, res->blob_size, PROT_READ,
>>>                             MAP_SHARED, res->dmabuf_fd, 0);
>>>        if (res->remapped == MAP_FAILED) {
>>> +        res->remapped = vfio_dmabuf_mmap(res);
>>> +        if (res->remapped != MAP_FAILED) {
>>> +            return;
>>> +        }
>>>            warn_report("%s: dmabuf mmap failed: %s", __func__,
>>>                        strerror(errno));
>>>            res->remapped = NULL;
>>> @@ -139,8 +236,12 @@ void virtio_gpu_init_dmabuf(struct
>> virtio_gpu_simple_resource *res)
>>>        } else {
>>>            virtio_gpu_create_udmabuf(res);
>>>            if (res->dmabuf_fd < 0) {
>>
>> Why test dmabuf_fd ? Can virtio_gpu_init_dmabuf() be called mulmtiple
>> times ?
> Yes, virtio_gpu_init_dmabuf() can be called multiple times as the Guest VM
> creates/uses new resources/Guest blobs, which can happen frequently.
> Are you suggesting that it is better to make virtio_gpu_create_udmabuf()
> return res->dmabuf_fd explicitly as opposed to the current indirect approach?

Yes. Good idea. It would be improve reading.

Thanks,

C.
Re: [PATCH v4 8/8] virtio-gpu-dmabuf: Create dmabuf for blobs associated with VFIO devices
Posted by Akihiko Odaki 2 weeks, 4 days ago
On 2026/01/23 15:17, Vivek Kasireddy wrote:
> In addition to memfd, a blob resource can also have its backing
> storage in a VFIO device region. Since, there is no effective way
> to determine where the backing storage is located, we first try to
> create a dmabuf assuming it is in memfd. If that fails, we try to
> create a dmabuf assuming it is in VFIO device region.
> 
> So, we first call virtio_gpu_create_udmabuf() to check if the blob's
> backing storage is located in a memfd or not. If it is not, we call
> vfio_create_dmabuf() which identifies the right VFIO device and
> eventually invokes the vfio_device_create_dmabuf_fd() API to have
> a dmabuf fd created.
> 
> Note that in virtio_gpu_remap_dmabuf(), we first try to test if
> the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
> use the VFIO device fd directly to create the CPU mapping.
> 
> While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
> from virtio_gpu_create_udmabuf() and also replace warn_report()
> with qemu_log_mask().
> 
> Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
> Cc: Alex Bennée <alex.bennee@linaro.org>
> Cc: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
> Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> Cc: Alex Williamson <alex@shazbot.org>
> Cc: Cédric Le Goater <clg@redhat.com>
> Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
> ---
>   hw/display/Kconfig             |   5 ++
>   hw/display/virtio-gpu-dmabuf.c | 117 ++++++++++++++++++++++++++++++---
>   2 files changed, 113 insertions(+), 9 deletions(-)
> 
> diff --git a/hw/display/Kconfig b/hw/display/Kconfig
> index 1e95ab28ef..0d090f25f5 100644
> --- a/hw/display/Kconfig
> +++ b/hw/display/Kconfig
> @@ -106,6 +106,11 @@ config VIRTIO_VGA
>       depends on VIRTIO_PCI
>       select VGA
>   
> +config VIRTIO_GPU_VFIO_BLOB
> +    bool
> +    default y
> +    depends on VFIO
> +
>   config VHOST_USER_GPU
>       bool
>       default y
> diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-dmabuf.c
> index c34d4c85bc..2dfe70d7eb 100644
> --- a/hw/display/virtio-gpu-dmabuf.c
> +++ b/hw/display/virtio-gpu-dmabuf.c
> @@ -18,6 +18,7 @@
>   #include "ui/console.h"
>   #include "hw/virtio/virtio-gpu.h"
>   #include "hw/virtio/virtio-gpu-pixman.h"
> +#include "hw/vfio/vfio-device.h"
>   #include "trace.h"
>   #include "system/ramblock.h"
>   #include "system/hostmem.h"
> @@ -27,6 +28,38 @@
>   #include "standard-headers/linux/udmabuf.h"
>   #include "standard-headers/drm/drm_fourcc.h"
>   
> +static void vfio_create_dmabuf(struct virtio_gpu_simple_resource *res)
> +{
> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> +    VFIODevice *vbasedev;
> +    RAMBlock *first_rb;
> +    ram_addr_t offset;
> +
> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
> +    if (!first_rb) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s: Could not find ramblock\n", __func__);
> +        return;
> +    }
> +
> +    vbasedev = vfio_device_lookup(first_rb->mr);
> +    if (!vbasedev) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s: No VFIO device found to create dmabuf from\n",
> +                      __func__);
> +        return;
> +    }
> +
> +    res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
> +                                                  res->iov, res->iov_cnt);
> +    if (res->dmabuf_fd < 0) {
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
> +                      __func__, strerror(errno));
> +    }

This API design of vfio_device_create_dmabuf_fd() has some room of 
improvements.

First, this function requires to pass VFIODevice * which is derived from 
res->iov[0]. Since it is getting res->iov[0] anyway, such a parameter is 
redundant. vfio_device_create_dmabuf_fd() can call vfio_device_lookup() 
itself instead.

The second problem is the error handling. The error may not be caused by 
VFIO_DEVICE_FEATURE_DMA_BUF but may be caused by the guest passing 
regions that do not belong to one VFIO device. The two cases should be 
distinguished for a correct message.

> +#endif
> +}
> +
>   static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
>   {
>       struct udmabuf_create_list *list;
> @@ -43,10 +76,7 @@ static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
>                        sizeof(struct udmabuf_create_item) * res->iov_cnt);
>   
>       for (i = 0; i < res->iov_cnt; i++) {
> -        rcu_read_lock();
>           rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
> -        rcu_read_unlock();
> -
>           if (!rb || rb->fd < 0) {
>               g_free(list);
>               return;
> @@ -62,17 +92,84 @@ static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
>   
>       res->dmabuf_fd = ioctl(udmabuf, UDMABUF_CREATE_LIST, list);
>       if (res->dmabuf_fd < 0) {
> -        warn_report("%s: UDMABUF_CREATE_LIST: %s", __func__,
> -                    strerror(errno));
> +        qemu_log_mask(LOG_GUEST_ERROR,
> +                      "%s: UDMABUF_CREATE_LIST: %s\n",
> +                      __func__, strerror(errno));
>       }
>       g_free(list);
>   }

These changes of virtio_gpu_create_udmabuf() are independent so should 
go into separate patches with explanation.

>   
> +static void *vfio_dmabuf_mmap(struct virtio_gpu_simple_resource *res)
> +{
> +    struct vfio_region_info *info = NULL;
> +    VFIODevice *vbasedev = NULL;
> +    ram_addr_t offset, len = 0;
> +    RAMBlock *first_rb, *rb;
> +    void *map, *submap;
> +    int i, ret = -1;
> +
> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
> +    if (!first_rb) {
> +        return MAP_FAILED;
> +    }
> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> +    vbasedev = vfio_device_lookup(first_rb->mr);
> +#endif
> +    if (!vbasedev) {
> +        return MAP_FAILED;
> +    }
> +
> +    /*
> +     * We first reserve a contiguous chunk of address space for the entire
> +     * dmabuf, then replace it with smaller mappings that correspond to the
> +     * individual segments of the dmabuf.
> +     */
> +    map = mmap(NULL, res->blob_size, PROT_READ, MAP_SHARED, vbasedev->fd, 0);

It should have PROT_NONE instead of PROT_READ to prevent accidental 
accesses to the placeholder mapping.
Likewise, -1 should be passed instead of vbasedev->fd so that the 
placeholder mapping won't result in a mapping to real device.

> +    if (map == MAP_FAILED) {
> +        return map;
> +    }
> +
> +    for (i = 0; i < res->iov_cnt; i++) {
> +        rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
> +        if (rb != first_rb) {
> +            goto err;
> +        }
> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> +        ret = vfio_get_region_index_from_mr(rb->mr);
> +        if (ret < 0) {
> +            goto err;
> +        }
> +
> +        ret = vfio_device_get_region_info(vbasedev, ret, &info);

This function can take rb->mr and call vfio_device_lookup() and 
vfio_get_region_index_from_mr() internally to simplify the API as 
similar can be done for vfio_device_create_dmabuf_fd().

> +#endif
> +        if (ret < 0 || !info) {
> +            goto err;
> +        }
> +
> +        submap = mmap(map + len, res->iov[i].iov_len, PROT_READ,
> +                      MAP_SHARED | MAP_FIXED, vbasedev->fd,
> +                      info->offset + offset);
> +        if (submap == MAP_FAILED) {
> +            goto err;
> +        }
> +
> +        len += res->iov[i].iov_len;
> +    }
> +    return map;
> +err:
> +    munmap(map, res->blob_size);
> +    return MAP_FAILED;
> +}
> +
>   static void virtio_gpu_remap_dmabuf(struct virtio_gpu_simple_resource *res)
>   {
>       res->remapped = mmap(NULL, res->blob_size, PROT_READ,
>                            MAP_SHARED, res->dmabuf_fd, 0);
>       if (res->remapped == MAP_FAILED) {
> +        res->remapped = vfio_dmabuf_mmap(res);
> +        if (res->remapped != MAP_FAILED) {
> +            return;
> +        }
>           warn_report("%s: dmabuf mmap failed: %s", __func__,
>                       strerror(errno));
>           res->remapped = NULL;
> @@ -139,8 +236,12 @@ void virtio_gpu_init_dmabuf(struct virtio_gpu_simple_resource *res)
>       } else {
>           virtio_gpu_create_udmabuf(res);
>           if (res->dmabuf_fd < 0) {
> -            return;
> +            vfio_create_dmabuf(res);
> +            if (res->dmabuf_fd < 0) {
> +                return;
> +            }
>           }
> +
>           virtio_gpu_remap_dmabuf(res);
>           if (!res->remapped) {
>               return;
> @@ -153,9 +254,7 @@ void virtio_gpu_init_dmabuf(struct virtio_gpu_simple_resource *res)
>   
>   void virtio_gpu_fini_dmabuf(struct virtio_gpu_simple_resource *res)
>   {
> -    if (res->remapped) {
> -        virtio_gpu_destroy_dmabuf(res);
> -    }
> +    virtio_gpu_destroy_dmabuf(res);
>   }
>   
>   static void virtio_gpu_free_dmabuf(VirtIOGPU *g, VGPUDMABuf *dmabuf)


RE: [PATCH v4 8/8] virtio-gpu-dmabuf: Create dmabuf for blobs associated with VFIO devices
Posted by Kasireddy, Vivek 2 weeks ago
Hi Akihiko,

> Subject: Re: [PATCH v4 8/8] virtio-gpu-dmabuf: Create dmabuf for blobs
> associated with VFIO devices
> 
> On 2026/01/23 15:17, Vivek Kasireddy wrote:
> > In addition to memfd, a blob resource can also have its backing
> > storage in a VFIO device region. Since, there is no effective way
> > to determine where the backing storage is located, we first try to
> > create a dmabuf assuming it is in memfd. If that fails, we try to
> > create a dmabuf assuming it is in VFIO device region.
> >
> > So, we first call virtio_gpu_create_udmabuf() to check if the blob's
> > backing storage is located in a memfd or not. If it is not, we call
> > vfio_create_dmabuf() which identifies the right VFIO device and
> > eventually invokes the vfio_device_create_dmabuf_fd() API to have
> > a dmabuf fd created.
> >
> > Note that in virtio_gpu_remap_dmabuf(), we first try to test if
> > the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
> > use the VFIO device fd directly to create the CPU mapping.
> >
> > While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
> > from virtio_gpu_create_udmabuf() and also replace warn_report()
> > with qemu_log_mask().
> >
> > Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
> > Cc: Alex Bennée <alex.bennee@linaro.org>
> > Cc: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
> > Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> > Cc: Alex Williamson <alex@shazbot.org>
> > Cc: Cédric Le Goater <clg@redhat.com>
> > Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
> > ---
> >   hw/display/Kconfig             |   5 ++
> >   hw/display/virtio-gpu-dmabuf.c | 117
> ++++++++++++++++++++++++++++++---
> >   2 files changed, 113 insertions(+), 9 deletions(-)
> >
> > diff --git a/hw/display/Kconfig b/hw/display/Kconfig
> > index 1e95ab28ef..0d090f25f5 100644
> > --- a/hw/display/Kconfig
> > +++ b/hw/display/Kconfig
> > @@ -106,6 +106,11 @@ config VIRTIO_VGA
> >       depends on VIRTIO_PCI
> >       select VGA
> >
> > +config VIRTIO_GPU_VFIO_BLOB
> > +    bool
> > +    default y
> > +    depends on VFIO
> > +
> >   config VHOST_USER_GPU
> >       bool
> >       default y
> > diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-
> dmabuf.c
> > index c34d4c85bc..2dfe70d7eb 100644
> > --- a/hw/display/virtio-gpu-dmabuf.c
> > +++ b/hw/display/virtio-gpu-dmabuf.c
> > @@ -18,6 +18,7 @@
> >   #include "ui/console.h"
> >   #include "hw/virtio/virtio-gpu.h"
> >   #include "hw/virtio/virtio-gpu-pixman.h"
> > +#include "hw/vfio/vfio-device.h"
> >   #include "trace.h"
> >   #include "system/ramblock.h"
> >   #include "system/hostmem.h"
> > @@ -27,6 +28,38 @@
> >   #include "standard-headers/linux/udmabuf.h"
> >   #include "standard-headers/drm/drm_fourcc.h"
> >
> > +static void vfio_create_dmabuf(struct virtio_gpu_simple_resource
> *res)
> > +{
> > +#if defined(VIRTIO_GPU_VFIO_BLOB)
> > +    VFIODevice *vbasedev;
> > +    RAMBlock *first_rb;
> > +    ram_addr_t offset;
> > +
> > +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
> &offset);
> > +    if (!first_rb) {
> > +        qemu_log_mask(LOG_GUEST_ERROR,
> > +                      "%s: Could not find ramblock\n", __func__);
> > +        return;
> > +    }
> > +
> > +    vbasedev = vfio_device_lookup(first_rb->mr);
> > +    if (!vbasedev) {
> > +        qemu_log_mask(LOG_GUEST_ERROR,
> > +                      "%s: No VFIO device found to create dmabuf from\n",
> > +                      __func__);
> > +        return;
> > +    }
> > +
> > +    res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
> > +                                                  res->iov, res->iov_cnt);
> > +    if (res->dmabuf_fd < 0) {
> > +        qemu_log_mask(LOG_GUEST_ERROR,
> > +                      "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
> > +                      __func__, strerror(errno));
> > +    }
> 
> This API design of vfio_device_create_dmabuf_fd() has some room of
> improvements.
> 
> First, this function requires to pass VFIODevice * which is derived from
> res->iov[0]. Since it is getting res->iov[0] anyway, such a parameter is
> redundant. vfio_device_create_dmabuf_fd() can call
> vfio_device_lookup()
> itself instead.
Yeah, I agree. I'll move the call to lookup into vfio_device_create_dmabuf_fd().

> 
> The second problem is the error handling. The error may not be caused by
> VFIO_DEVICE_FEATURE_DMA_BUF but may be caused by the guest
> passing
> regions that do not belong to one VFIO device. The two cases should be
> distinguished for a correct message.
Ok, I'll add separate error messages. 

> 
> > +#endif
> > +}
> > +
> >   static void virtio_gpu_create_udmabuf(struct
> virtio_gpu_simple_resource *res)
> >   {
> >       struct udmabuf_create_list *list;
> > @@ -43,10 +76,7 @@ static void virtio_gpu_create_udmabuf(struct
> virtio_gpu_simple_resource *res)
> >                        sizeof(struct udmabuf_create_item) * res->iov_cnt);
> >
> >       for (i = 0; i < res->iov_cnt; i++) {
> > -        rcu_read_lock();
> >           rb = qemu_ram_block_from_host(res->iov[i].iov_base, false,
> &offset);
> > -        rcu_read_unlock();
> > -
> >           if (!rb || rb->fd < 0) {
> >               g_free(list);
> >               return;
> > @@ -62,17 +92,84 @@ static void virtio_gpu_create_udmabuf(struct
> virtio_gpu_simple_resource *res)
> >
> >       res->dmabuf_fd = ioctl(udmabuf, UDMABUF_CREATE_LIST, list);
> >       if (res->dmabuf_fd < 0) {
> > -        warn_report("%s: UDMABUF_CREATE_LIST: %s", __func__,
> > -                    strerror(errno));
> > +        qemu_log_mask(LOG_GUEST_ERROR,
> > +                      "%s: UDMABUF_CREATE_LIST: %s\n",
> > +                      __func__, strerror(errno));
> >       }
> >       g_free(list);
> >   }
> 
> These changes of virtio_gpu_create_udmabuf() are independent so
> should
> go into separate patches with explanation.
I called these out in the commit message and included them as they were
trivial but I guess I could move them into separate patches.

> 
> >
> > +static void *vfio_dmabuf_mmap(struct virtio_gpu_simple_resource
> *res)
> > +{
> > +    struct vfio_region_info *info = NULL;
> > +    VFIODevice *vbasedev = NULL;
> > +    ram_addr_t offset, len = 0;
> > +    RAMBlock *first_rb, *rb;
> > +    void *map, *submap;
> > +    int i, ret = -1;
> > +
> > +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
> &offset);
> > +    if (!first_rb) {
> > +        return MAP_FAILED;
> > +    }
> > +#if defined(VIRTIO_GPU_VFIO_BLOB)
> > +    vbasedev = vfio_device_lookup(first_rb->mr);
> > +#endif
> > +    if (!vbasedev) {
> > +        return MAP_FAILED;
> > +    }
> > +
> > +    /*
> > +     * We first reserve a contiguous chunk of address space for the entire
> > +     * dmabuf, then replace it with smaller mappings that correspond to
> the
> > +     * individual segments of the dmabuf.
> > +     */
> > +    map = mmap(NULL, res->blob_size, PROT_READ, MAP_SHARED,
> vbasedev->fd, 0);
> 
> It should have PROT_NONE instead of PROT_READ to prevent accidental
> accesses to the placeholder mapping.
> Likewise, -1 should be passed instead of vbasedev->fd so that the
> placeholder mapping won't result in a mapping to real device.
I am OK with both suggestions. However, I don't see any problem if the
placeholder mapping is associated with the real device. In other words,
what specific problem do you see if we keep vbasedev->fd instead of -1?

> 
> > +    if (map == MAP_FAILED) {
> > +        return map;
> > +    }
> > +
> > +    for (i = 0; i < res->iov_cnt; i++) {
> > +        rb = qemu_ram_block_from_host(res->iov[i].iov_base, false,
> &offset);
> > +        if (rb != first_rb) {
> > +            goto err;
> > +        }
> > +#if defined(VIRTIO_GPU_VFIO_BLOB)
> > +        ret = vfio_get_region_index_from_mr(rb->mr);
> > +        if (ret < 0) {
> > +            goto err;
> > +        }
> > +
> > +        ret = vfio_device_get_region_info(vbasedev, ret, &info);
> 
> This function can take rb->mr and call vfio_device_lookup() and
> vfio_get_region_index_from_mr() internally to simplify the API as
> similar can be done for vfio_device_create_dmabuf_fd().
Right, but vfio_device_get_region_info() is an already existing API
that is called from various places that I am trying to leverage. I could
create another API to wrap the calls to vfio_get_region_index_from_mr()
and vfio_device_get_region_info() but not sure if that makes sense given
that it would only have one caller. 

Thanks,
Vivek

> 
> > +#endif
> > +        if (ret < 0 || !info) {
> > +            goto err;
> > +        }
> > +
> > +        submap = mmap(map + len, res->iov[i].iov_len, PROT_READ,
> > +                      MAP_SHARED | MAP_FIXED, vbasedev->fd,
> > +                      info->offset + offset);
> > +        if (submap == MAP_FAILED) {
> > +            goto err;
> > +        }
> > +
> > +        len += res->iov[i].iov_len;
> > +    }
> > +    return map;
> > +err:
> > +    munmap(map, res->blob_size);
> > +    return MAP_FAILED;
> > +}
> > +
> >   static void virtio_gpu_remap_dmabuf(struct
> virtio_gpu_simple_resource *res)
> >   {
> >       res->remapped = mmap(NULL, res->blob_size, PROT_READ,
> >                            MAP_SHARED, res->dmabuf_fd, 0);
> >       if (res->remapped == MAP_FAILED) {
> > +        res->remapped = vfio_dmabuf_mmap(res);
> > +        if (res->remapped != MAP_FAILED) {
> > +            return;
> > +        }
> >           warn_report("%s: dmabuf mmap failed: %s", __func__,
> >                       strerror(errno));
> >           res->remapped = NULL;
> > @@ -139,8 +236,12 @@ void virtio_gpu_init_dmabuf(struct
> virtio_gpu_simple_resource *res)
> >       } else {
> >           virtio_gpu_create_udmabuf(res);
> >           if (res->dmabuf_fd < 0) {
> > -            return;
> > +            vfio_create_dmabuf(res);
> > +            if (res->dmabuf_fd < 0) {
> > +                return;
> > +            }
> >           }
> > +
> >           virtio_gpu_remap_dmabuf(res);
> >           if (!res->remapped) {
> >               return;
> > @@ -153,9 +254,7 @@ void virtio_gpu_init_dmabuf(struct
> virtio_gpu_simple_resource *res)
> >
> >   void virtio_gpu_fini_dmabuf(struct virtio_gpu_simple_resource *res)
> >   {
> > -    if (res->remapped) {
> > -        virtio_gpu_destroy_dmabuf(res);
> > -    }
> > +    virtio_gpu_destroy_dmabuf(res);
> >   }
> >
> >   static void virtio_gpu_free_dmabuf(VirtIOGPU *g, VGPUDMABuf
> *dmabuf)
Re: [PATCH v4 8/8] virtio-gpu-dmabuf: Create dmabuf for blobs associated with VFIO devices
Posted by Akihiko Odaki 1 week, 6 days ago
On 2026/01/27 15:14, Kasireddy, Vivek wrote:
> Hi Akihiko,
> 
>> Subject: Re: [PATCH v4 8/8] virtio-gpu-dmabuf: Create dmabuf for blobs
>> associated with VFIO devices
>>
>> On 2026/01/23 15:17, Vivek Kasireddy wrote:
>>> In addition to memfd, a blob resource can also have its backing
>>> storage in a VFIO device region. Since, there is no effective way
>>> to determine where the backing storage is located, we first try to
>>> create a dmabuf assuming it is in memfd. If that fails, we try to
>>> create a dmabuf assuming it is in VFIO device region.
>>>
>>> So, we first call virtio_gpu_create_udmabuf() to check if the blob's
>>> backing storage is located in a memfd or not. If it is not, we call
>>> vfio_create_dmabuf() which identifies the right VFIO device and
>>> eventually invokes the vfio_device_create_dmabuf_fd() API to have
>>> a dmabuf fd created.
>>>
>>> Note that in virtio_gpu_remap_dmabuf(), we first try to test if
>>> the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
>>> use the VFIO device fd directly to create the CPU mapping.
>>>
>>> While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
>>> from virtio_gpu_create_udmabuf() and also replace warn_report()
>>> with qemu_log_mask().
>>>
>>> Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
>>> Cc: Alex Bennée <alex.bennee@linaro.org>
>>> Cc: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
>>> Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
>>> Cc: Alex Williamson <alex@shazbot.org>
>>> Cc: Cédric Le Goater <clg@redhat.com>
>>> Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
>>> ---
>>>    hw/display/Kconfig             |   5 ++
>>>    hw/display/virtio-gpu-dmabuf.c | 117
>> ++++++++++++++++++++++++++++++---
>>>    2 files changed, 113 insertions(+), 9 deletions(-)
>>>
>>> diff --git a/hw/display/Kconfig b/hw/display/Kconfig
>>> index 1e95ab28ef..0d090f25f5 100644
>>> --- a/hw/display/Kconfig
>>> +++ b/hw/display/Kconfig
>>> @@ -106,6 +106,11 @@ config VIRTIO_VGA
>>>        depends on VIRTIO_PCI
>>>        select VGA
>>>
>>> +config VIRTIO_GPU_VFIO_BLOB
>>> +    bool
>>> +    default y
>>> +    depends on VFIO
>>> +
>>>    config VHOST_USER_GPU
>>>        bool
>>>        default y
>>> diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-
>> dmabuf.c
>>> index c34d4c85bc..2dfe70d7eb 100644
>>> --- a/hw/display/virtio-gpu-dmabuf.c
>>> +++ b/hw/display/virtio-gpu-dmabuf.c
>>> @@ -18,6 +18,7 @@
>>>    #include "ui/console.h"
>>>    #include "hw/virtio/virtio-gpu.h"
>>>    #include "hw/virtio/virtio-gpu-pixman.h"
>>> +#include "hw/vfio/vfio-device.h"
>>>    #include "trace.h"
>>>    #include "system/ramblock.h"
>>>    #include "system/hostmem.h"
>>> @@ -27,6 +28,38 @@
>>>    #include "standard-headers/linux/udmabuf.h"
>>>    #include "standard-headers/drm/drm_fourcc.h"
>>>
>>> +static void vfio_create_dmabuf(struct virtio_gpu_simple_resource
>> *res)
>>> +{
>>> +#if defined(VIRTIO_GPU_VFIO_BLOB)
>>> +    VFIODevice *vbasedev;
>>> +    RAMBlock *first_rb;
>>> +    ram_addr_t offset;
>>> +
>>> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
>> &offset);
>>> +    if (!first_rb) {
>>> +        qemu_log_mask(LOG_GUEST_ERROR,
>>> +                      "%s: Could not find ramblock\n", __func__);
>>> +        return;
>>> +    }
>>> +
>>> +    vbasedev = vfio_device_lookup(first_rb->mr);
>>> +    if (!vbasedev) {
>>> +        qemu_log_mask(LOG_GUEST_ERROR,
>>> +                      "%s: No VFIO device found to create dmabuf from\n",
>>> +                      __func__);
>>> +        return;
>>> +    }
>>> +
>>> +    res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
>>> +                                                  res->iov, res->iov_cnt);
>>> +    if (res->dmabuf_fd < 0) {
>>> +        qemu_log_mask(LOG_GUEST_ERROR,
>>> +                      "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
>>> +                      __func__, strerror(errno));
>>> +    }
>>
>> This API design of vfio_device_create_dmabuf_fd() has some room of
>> improvements.
>>
>> First, this function requires to pass VFIODevice * which is derived from
>> res->iov[0]. Since it is getting res->iov[0] anyway, such a parameter is
>> redundant. vfio_device_create_dmabuf_fd() can call
>> vfio_device_lookup()
>> itself instead.
> Yeah, I agree. I'll move the call to lookup into vfio_device_create_dmabuf_fd().
> 
>>
>> The second problem is the error handling. The error may not be caused by
>> VFIO_DEVICE_FEATURE_DMA_BUF but may be caused by the guest
>> passing
>> regions that do not belong to one VFIO device. The two cases should be
>> distinguished for a correct message.
> Ok, I'll add separate error messages.
> 
>>
>>> +#endif
>>> +}
>>> +
>>>    static void virtio_gpu_create_udmabuf(struct
>> virtio_gpu_simple_resource *res)
>>>    {
>>>        struct udmabuf_create_list *list;
>>> @@ -43,10 +76,7 @@ static void virtio_gpu_create_udmabuf(struct
>> virtio_gpu_simple_resource *res)
>>>                         sizeof(struct udmabuf_create_item) * res->iov_cnt);
>>>
>>>        for (i = 0; i < res->iov_cnt; i++) {
>>> -        rcu_read_lock();
>>>            rb = qemu_ram_block_from_host(res->iov[i].iov_base, false,
>> &offset);
>>> -        rcu_read_unlock();
>>> -
>>>            if (!rb || rb->fd < 0) {
>>>                g_free(list);
>>>                return;
>>> @@ -62,17 +92,84 @@ static void virtio_gpu_create_udmabuf(struct
>> virtio_gpu_simple_resource *res)
>>>
>>>        res->dmabuf_fd = ioctl(udmabuf, UDMABUF_CREATE_LIST, list);
>>>        if (res->dmabuf_fd < 0) {
>>> -        warn_report("%s: UDMABUF_CREATE_LIST: %s", __func__,
>>> -                    strerror(errno));
>>> +        qemu_log_mask(LOG_GUEST_ERROR,
>>> +                      "%s: UDMABUF_CREATE_LIST: %s\n",
>>> +                      __func__, strerror(errno));
>>>        }
>>>        g_free(list);
>>>    }
>>
>> These changes of virtio_gpu_create_udmabuf() are independent so
>> should
>> go into separate patches with explanation.
> I called these out in the commit message and included them as they were
> trivial but I guess I could move them into separate patches.

As docs/devel/submitting-a-patch.rst says,
 > A commit message that mentions "Also, ..." is often a good candidate
 > for splitting into multiple patches.

I think there will be another round of review so I suggest taking the 
chance to do so.

> 
>>
>>>
>>> +static void *vfio_dmabuf_mmap(struct virtio_gpu_simple_resource
>> *res)
>>> +{
>>> +    struct vfio_region_info *info = NULL;
>>> +    VFIODevice *vbasedev = NULL;
>>> +    ram_addr_t offset, len = 0;
>>> +    RAMBlock *first_rb, *rb;
>>> +    void *map, *submap;
>>> +    int i, ret = -1;
>>> +
>>> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
>> &offset);
>>> +    if (!first_rb) {
>>> +        return MAP_FAILED;
>>> +    }
>>> +#if defined(VIRTIO_GPU_VFIO_BLOB)
>>> +    vbasedev = vfio_device_lookup(first_rb->mr);
>>> +#endif
>>> +    if (!vbasedev) {
>>> +        return MAP_FAILED;
>>> +    }
>>> +
>>> +    /*
>>> +     * We first reserve a contiguous chunk of address space for the entire
>>> +     * dmabuf, then replace it with smaller mappings that correspond to
>> the
>>> +     * individual segments of the dmabuf.
>>> +     */
>>> +    map = mmap(NULL, res->blob_size, PROT_READ, MAP_SHARED,
>> vbasedev->fd, 0);
>>
>> It should have PROT_NONE instead of PROT_READ to prevent accidental
>> accesses to the placeholder mapping.
>> Likewise, -1 should be passed instead of vbasedev->fd so that the
>> placeholder mapping won't result in a mapping to real device.
> I am OK with both suggestions. However, I don't see any problem if the
> placeholder mapping is associated with the real device. In other words,
> what specific problem do you see if we keep vbasedev->fd instead of -1?

These two suggestions are to avoid accidents. If the code is right, 
there will be no accesses to the head of the device memory. But if there 
is something wrong with it, these two changes will be safe guards. In 
other words, with these two changes, you will not have such an 
accidental access unless all of the following three happen:
1) Accidentally accessing the mapped page.
2) The protection is set something else PROT_NONE.
3) The mapping points to the device.

Each of these conditions make accidents rarer to occur.

> 
>>
>>> +    if (map == MAP_FAILED) {
>>> +        return map;
>>> +    }
>>> +
>>> +    for (i = 0; i < res->iov_cnt; i++) {
>>> +        rb = qemu_ram_block_from_host(res->iov[i].iov_base, false,
>> &offset);
>>> +        if (rb != first_rb) {
>>> +            goto err;
>>> +        }
>>> +#if defined(VIRTIO_GPU_VFIO_BLOB)
>>> +        ret = vfio_get_region_index_from_mr(rb->mr);
>>> +        if (ret < 0) {
>>> +            goto err;
>>> +        }
>>> +
>>> +        ret = vfio_device_get_region_info(vbasedev, ret, &info);
>>
>> This function can take rb->mr and call vfio_device_lookup() and
>> vfio_get_region_index_from_mr() internally to simplify the API as
>> similar can be done for vfio_device_create_dmabuf_fd().
> Right, but vfio_device_get_region_info() is an already existing API
> that is called from various places that I am trying to leverage. I could
> create another API to wrap the calls to vfio_get_region_index_from_mr()
> and vfio_device_get_region_info() but not sure if that makes sense given
> that it would only have one caller.

vfio_device_get_region_info() should be regarded as an internal function 
of VFIO.

Thinking of that, the current API design that relies on 
vfio_device_lookup() to expose VFIODevice * is not optimal. We never 
want virtio-gpu or anyone else VFIO to mess up with it using functions 
declared in include/hw/vfio/vfio-device.h. Likewise, exposing
vbasedev->fd introduces bunch of hazards.

I think this entire function is better moved to VFIO for better 
encapsulation. I also think it should be placed close to 
vfio_device_create_dmabuf_fd(). These functions are quite similar so are 
better reviewed togather. Any future event to change either of the 
function is also likely to require a change to the other, so placing 
them nearby will be convenient.

Regards,
Akihiko Odaki

RE: [PATCH v4 8/8] virtio-gpu-dmabuf: Create dmabuf for blobs associated with VFIO devices
Posted by Kasireddy, Vivek 1 week, 5 days ago
Hi Akihiko,

> Subject: Re: [PATCH v4 8/8] virtio-gpu-dmabuf: Create dmabuf for blobs
> associated with VFIO devices
> >>> In addition to memfd, a blob resource can also have its backing
> >>> storage in a VFIO device region. Since, there is no effective way
> >>> to determine where the backing storage is located, we first try to
> >>> create a dmabuf assuming it is in memfd. If that fails, we try to
> >>> create a dmabuf assuming it is in VFIO device region.
> >>>
> >>> So, we first call virtio_gpu_create_udmabuf() to check if the blob's
> >>> backing storage is located in a memfd or not. If it is not, we call
> >>> vfio_create_dmabuf() which identifies the right VFIO device and
> >>> eventually invokes the vfio_device_create_dmabuf_fd() API to have
> >>> a dmabuf fd created.
> >>>
> >>> Note that in virtio_gpu_remap_dmabuf(), we first try to test if
> >>> the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
> >>> use the VFIO device fd directly to create the CPU mapping.
> >>>
> >>> While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
> >>> from virtio_gpu_create_udmabuf() and also replace warn_report()
> >>> with qemu_log_mask().
> >>>
> >>> Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
> >>> Cc: Alex Bennée <alex.bennee@linaro.org>
> >>> Cc: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
> >>> Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> >>> Cc: Alex Williamson <alex@shazbot.org>
> >>> Cc: Cédric Le Goater <clg@redhat.com>
> >>> Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
> >>> ---
> >>>    hw/display/Kconfig             |   5 ++
> >>>    hw/display/virtio-gpu-dmabuf.c | 117
> >> ++++++++++++++++++++++++++++++---
> >>>    2 files changed, 113 insertions(+), 9 deletions(-)
> >>>
> >>> diff --git a/hw/display/Kconfig b/hw/display/Kconfig
> >>> index 1e95ab28ef..0d090f25f5 100644
> >>> --- a/hw/display/Kconfig
> >>> +++ b/hw/display/Kconfig
> >>> @@ -106,6 +106,11 @@ config VIRTIO_VGA
> >>>        depends on VIRTIO_PCI
> >>>        select VGA
> >>>
> >>> +config VIRTIO_GPU_VFIO_BLOB
> >>> +    bool
> >>> +    default y
> >>> +    depends on VFIO
> >>> +
> >>>    config VHOST_USER_GPU
> >>>        bool
> >>>        default y
> >>> diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-
> >> dmabuf.c
> >>> index c34d4c85bc..2dfe70d7eb 100644
> >>> --- a/hw/display/virtio-gpu-dmabuf.c
> >>> +++ b/hw/display/virtio-gpu-dmabuf.c
> >>> @@ -18,6 +18,7 @@
> >>>    #include "ui/console.h"
> >>>    #include "hw/virtio/virtio-gpu.h"
> >>>    #include "hw/virtio/virtio-gpu-pixman.h"
> >>> +#include "hw/vfio/vfio-device.h"
> >>>    #include "trace.h"
> >>>    #include "system/ramblock.h"
> >>>    #include "system/hostmem.h"
> >>> @@ -27,6 +28,38 @@
> >>>    #include "standard-headers/linux/udmabuf.h"
> >>>    #include "standard-headers/drm/drm_fourcc.h"
> >>>
> >>> +static void vfio_create_dmabuf(struct virtio_gpu_simple_resource
> >> *res)
> >>> +{
> >>> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> >>> +    VFIODevice *vbasedev;
> >>> +    RAMBlock *first_rb;
> >>> +    ram_addr_t offset;
> >>> +
> >>> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
> >> &offset);
> >>> +    if (!first_rb) {
> >>> +        qemu_log_mask(LOG_GUEST_ERROR,
> >>> +                      "%s: Could not find ramblock\n", __func__);
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    vbasedev = vfio_device_lookup(first_rb->mr);
> >>> +    if (!vbasedev) {
> >>> +        qemu_log_mask(LOG_GUEST_ERROR,
> >>> +                      "%s: No VFIO device found to create dmabuf from\n",
> >>> +                      __func__);
> >>> +        return;
> >>> +    }
> >>> +
> >>> +    res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
> >>> +                                                  res->iov, res->iov_cnt);
> >>> +    if (res->dmabuf_fd < 0) {
> >>> +        qemu_log_mask(LOG_GUEST_ERROR,
> >>> +                      "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
> >>> +                      __func__, strerror(errno));
> >>> +    }
> >>
> >> This API design of vfio_device_create_dmabuf_fd() has some room of
> >> improvements.
> >>
> >> First, this function requires to pass VFIODevice * which is derived from
> >> res->iov[0]. Since it is getting res->iov[0] anyway, such a parameter is
> >> redundant. vfio_device_create_dmabuf_fd() can call
> >> vfio_device_lookup()
> >> itself instead.
> > Yeah, I agree. I'll move the call to lookup into
> vfio_device_create_dmabuf_fd().
> >
> >>
> >> The second problem is the error handling. The error may not be caused
> by
> >> VFIO_DEVICE_FEATURE_DMA_BUF but may be caused by the guest
> >> passing
> >> regions that do not belong to one VFIO device. The two cases should
> be
> >> distinguished for a correct message.
> > Ok, I'll add separate error messages.
> >
> >>
> >>> +#endif
> >>> +}
> >>> +
> >>>    static void virtio_gpu_create_udmabuf(struct
> >> virtio_gpu_simple_resource *res)
> >>>    {
> >>>        struct udmabuf_create_list *list;
> >>> @@ -43,10 +76,7 @@ static void virtio_gpu_create_udmabuf(struct
> >> virtio_gpu_simple_resource *res)
> >>>                         sizeof(struct udmabuf_create_item) * res->iov_cnt);
> >>>
> >>>        for (i = 0; i < res->iov_cnt; i++) {
> >>> -        rcu_read_lock();
> >>>            rb = qemu_ram_block_from_host(res->iov[i].iov_base, false,
> >> &offset);
> >>> -        rcu_read_unlock();
> >>> -
> >>>            if (!rb || rb->fd < 0) {
> >>>                g_free(list);
> >>>                return;
> >>> @@ -62,17 +92,84 @@ static void virtio_gpu_create_udmabuf(struct
> >> virtio_gpu_simple_resource *res)
> >>>
> >>>        res->dmabuf_fd = ioctl(udmabuf, UDMABUF_CREATE_LIST, list);
> >>>        if (res->dmabuf_fd < 0) {
> >>> -        warn_report("%s: UDMABUF_CREATE_LIST: %s", __func__,
> >>> -                    strerror(errno));
> >>> +        qemu_log_mask(LOG_GUEST_ERROR,
> >>> +                      "%s: UDMABUF_CREATE_LIST: %s\n",
> >>> +                      __func__, strerror(errno));
> >>>        }
> >>>        g_free(list);
> >>>    }
> >>
> >> These changes of virtio_gpu_create_udmabuf() are independent so
> >> should
> >> go into separate patches with explanation.
> > I called these out in the commit message and included them as they
> were
> > trivial but I guess I could move them into separate patches.
> 
> As docs/devel/submitting-a-patch.rst says,
>  > A commit message that mentions "Also, ..." is often a good candidate
>  > for splitting into multiple patches.
> 
> I think there will be another round of review so I suggest taking the
> chance to do so.
Ok, I'll split them into separate patches.

> 
> >
> >>
> >>>
> >>> +static void *vfio_dmabuf_mmap(struct virtio_gpu_simple_resource
> >> *res)
> >>> +{
> >>> +    struct vfio_region_info *info = NULL;
> >>> +    VFIODevice *vbasedev = NULL;
> >>> +    ram_addr_t offset, len = 0;
> >>> +    RAMBlock *first_rb, *rb;
> >>> +    void *map, *submap;
> >>> +    int i, ret = -1;
> >>> +
> >>> +    first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false,
> >> &offset);
> >>> +    if (!first_rb) {
> >>> +        return MAP_FAILED;
> >>> +    }
> >>> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> >>> +    vbasedev = vfio_device_lookup(first_rb->mr);
> >>> +#endif
> >>> +    if (!vbasedev) {
> >>> +        return MAP_FAILED;
> >>> +    }
> >>> +
> >>> +    /*
> >>> +     * We first reserve a contiguous chunk of address space for the
> entire
> >>> +     * dmabuf, then replace it with smaller mappings that correspond
> to
> >> the
> >>> +     * individual segments of the dmabuf.
> >>> +     */
> >>> +    map = mmap(NULL, res->blob_size, PROT_READ, MAP_SHARED,
> >> vbasedev->fd, 0);
> >>
> >> It should have PROT_NONE instead of PROT_READ to prevent
> accidental
> >> accesses to the placeholder mapping.
> >> Likewise, -1 should be passed instead of vbasedev->fd so that the
> >> placeholder mapping won't result in a mapping to real device.
> > I am OK with both suggestions. However, I don't see any problem if the
> > placeholder mapping is associated with the real device. In other words,
> > what specific problem do you see if we keep vbasedev->fd instead of -1?
> 
> These two suggestions are to avoid accidents. If the code is right,
> there will be no accesses to the head of the device memory. But if there
> is something wrong with it, these two changes will be safe guards. In
> other words, with these two changes, you will not have such an
> accidental access unless all of the following three happen:
> 1) Accidentally accessing the mapped page.
> 2) The protection is set something else PROT_NONE.
> 3) The mapping points to the device.
> 
> Each of these conditions make accidents rarer to occur.
Sounds good. I'll add these two suggestions.

> 
> >
> >>
> >>> +    if (map == MAP_FAILED) {
> >>> +        return map;
> >>> +    }
> >>> +
> >>> +    for (i = 0; i < res->iov_cnt; i++) {
> >>> +        rb = qemu_ram_block_from_host(res->iov[i].iov_base, false,
> >> &offset);
> >>> +        if (rb != first_rb) {
> >>> +            goto err;
> >>> +        }
> >>> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> >>> +        ret = vfio_get_region_index_from_mr(rb->mr);
> >>> +        if (ret < 0) {
> >>> +            goto err;
> >>> +        }
> >>> +
> >>> +        ret = vfio_device_get_region_info(vbasedev, ret, &info);
> >>
> >> This function can take rb->mr and call vfio_device_lookup() and
> >> vfio_get_region_index_from_mr() internally to simplify the API as
> >> similar can be done for vfio_device_create_dmabuf_fd().
> > Right, but vfio_device_get_region_info() is an already existing API
> > that is called from various places that I am trying to leverage. I could
> > create another API to wrap the calls to
> vfio_get_region_index_from_mr()
> > and vfio_device_get_region_info() but not sure if that makes sense
> given
> > that it would only have one caller.
> 
> vfio_device_get_region_info() should be regarded as an internal function
> of VFIO.
> 
> Thinking of that, the current API design that relies on
> vfio_device_lookup() to expose VFIODevice * is not optimal. We never
> want virtio-gpu or anyone else VFIO to mess up with it using functions
> declared in include/hw/vfio/vfio-device.h. Likewise, exposing
> vbasedev->fd introduces bunch of hazards.
> 
> I think this entire function is better moved to VFIO for better
> encapsulation. I also think it should be placed close to
> vfio_device_create_dmabuf_fd(). These functions are quite similar so are
> better reviewed togather. Any future event to change either of the
> function is also likely to require a change to the other, so placing
> them nearby will be convenient.
Ok, I'll try to rename and move this function to VFIO.

Thanks,
Vivek

> 
> Regards,
> Akihiko Odaki