In addition to memfd, a blob resource can also have its backing
storage in a VFIO device region. Since, there is no effective way
to determine where the backing storage is located, we first try to
create a dmabuf assuming it is in memfd. If that fails, we try to
create a dmabuf assuming it is in VFIO device region.
So, we first call virtio_gpu_create_udmabuf() to check if the blob's
backing storage is located in a memfd or not. If it is not, we call
vfio_create_dmabuf() which identifies the right VFIO device and
eventually invokes the vfio_device_create_dmabuf_fd() API to have
a dmabuf fd created.
Note that in virtio_gpu_remap_dmabuf(), we first try to test if
the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
use the VFIO device fd directly to create the CPU mapping.
While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
from virtio_gpu_create_udmabuf() and also replace warn_report()
with qemu_log_mask().
Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
Cc: Alex Bennée <alex.bennee@linaro.org>
Cc: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
Cc: Alex Williamson <alex@shazbot.org>
Cc: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
---
hw/display/Kconfig | 5 ++
hw/display/virtio-gpu-dmabuf.c | 117 ++++++++++++++++++++++++++++++---
2 files changed, 113 insertions(+), 9 deletions(-)
diff --git a/hw/display/Kconfig b/hw/display/Kconfig
index 1e95ab28ef..0d090f25f5 100644
--- a/hw/display/Kconfig
+++ b/hw/display/Kconfig
@@ -106,6 +106,11 @@ config VIRTIO_VGA
depends on VIRTIO_PCI
select VGA
+config VIRTIO_GPU_VFIO_BLOB
+ bool
+ default y
+ depends on VFIO
+
config VHOST_USER_GPU
bool
default y
diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-dmabuf.c
index c34d4c85bc..2dfe70d7eb 100644
--- a/hw/display/virtio-gpu-dmabuf.c
+++ b/hw/display/virtio-gpu-dmabuf.c
@@ -18,6 +18,7 @@
#include "ui/console.h"
#include "hw/virtio/virtio-gpu.h"
#include "hw/virtio/virtio-gpu-pixman.h"
+#include "hw/vfio/vfio-device.h"
#include "trace.h"
#include "system/ramblock.h"
#include "system/hostmem.h"
@@ -27,6 +28,38 @@
#include "standard-headers/linux/udmabuf.h"
#include "standard-headers/drm/drm_fourcc.h"
+static void vfio_create_dmabuf(struct virtio_gpu_simple_resource *res)
+{
+#if defined(VIRTIO_GPU_VFIO_BLOB)
+ VFIODevice *vbasedev;
+ RAMBlock *first_rb;
+ ram_addr_t offset;
+
+ first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
+ if (!first_rb) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: Could not find ramblock\n", __func__);
+ return;
+ }
+
+ vbasedev = vfio_device_lookup(first_rb->mr);
+ if (!vbasedev) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: No VFIO device found to create dmabuf from\n",
+ __func__);
+ return;
+ }
+
+ res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
+ res->iov, res->iov_cnt);
+ if (res->dmabuf_fd < 0) {
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
+ __func__, strerror(errno));
+ }
+#endif
+}
+
static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
{
struct udmabuf_create_list *list;
@@ -43,10 +76,7 @@ static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
sizeof(struct udmabuf_create_item) * res->iov_cnt);
for (i = 0; i < res->iov_cnt; i++) {
- rcu_read_lock();
rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
- rcu_read_unlock();
-
if (!rb || rb->fd < 0) {
g_free(list);
return;
@@ -62,17 +92,84 @@ static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
res->dmabuf_fd = ioctl(udmabuf, UDMABUF_CREATE_LIST, list);
if (res->dmabuf_fd < 0) {
- warn_report("%s: UDMABUF_CREATE_LIST: %s", __func__,
- strerror(errno));
+ qemu_log_mask(LOG_GUEST_ERROR,
+ "%s: UDMABUF_CREATE_LIST: %s\n",
+ __func__, strerror(errno));
}
g_free(list);
}
+static void *vfio_dmabuf_mmap(struct virtio_gpu_simple_resource *res)
+{
+ struct vfio_region_info *info = NULL;
+ VFIODevice *vbasedev = NULL;
+ ram_addr_t offset, len = 0;
+ RAMBlock *first_rb, *rb;
+ void *map, *submap;
+ int i, ret = -1;
+
+ first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
+ if (!first_rb) {
+ return MAP_FAILED;
+ }
+#if defined(VIRTIO_GPU_VFIO_BLOB)
+ vbasedev = vfio_device_lookup(first_rb->mr);
+#endif
+ if (!vbasedev) {
+ return MAP_FAILED;
+ }
+
+ /*
+ * We first reserve a contiguous chunk of address space for the entire
+ * dmabuf, then replace it with smaller mappings that correspond to the
+ * individual segments of the dmabuf.
+ */
+ map = mmap(NULL, res->blob_size, PROT_READ, MAP_SHARED, vbasedev->fd, 0);
+ if (map == MAP_FAILED) {
+ return map;
+ }
+
+ for (i = 0; i < res->iov_cnt; i++) {
+ rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
+ if (rb != first_rb) {
+ goto err;
+ }
+#if defined(VIRTIO_GPU_VFIO_BLOB)
+ ret = vfio_get_region_index_from_mr(rb->mr);
+ if (ret < 0) {
+ goto err;
+ }
+
+ ret = vfio_device_get_region_info(vbasedev, ret, &info);
+#endif
+ if (ret < 0 || !info) {
+ goto err;
+ }
+
+ submap = mmap(map + len, res->iov[i].iov_len, PROT_READ,
+ MAP_SHARED | MAP_FIXED, vbasedev->fd,
+ info->offset + offset);
+ if (submap == MAP_FAILED) {
+ goto err;
+ }
+
+ len += res->iov[i].iov_len;
+ }
+ return map;
+err:
+ munmap(map, res->blob_size);
+ return MAP_FAILED;
+}
+
static void virtio_gpu_remap_dmabuf(struct virtio_gpu_simple_resource *res)
{
res->remapped = mmap(NULL, res->blob_size, PROT_READ,
MAP_SHARED, res->dmabuf_fd, 0);
if (res->remapped == MAP_FAILED) {
+ res->remapped = vfio_dmabuf_mmap(res);
+ if (res->remapped != MAP_FAILED) {
+ return;
+ }
warn_report("%s: dmabuf mmap failed: %s", __func__,
strerror(errno));
res->remapped = NULL;
@@ -139,8 +236,12 @@ void virtio_gpu_init_dmabuf(struct virtio_gpu_simple_resource *res)
} else {
virtio_gpu_create_udmabuf(res);
if (res->dmabuf_fd < 0) {
- return;
+ vfio_create_dmabuf(res);
+ if (res->dmabuf_fd < 0) {
+ return;
+ }
}
+
virtio_gpu_remap_dmabuf(res);
if (!res->remapped) {
return;
@@ -153,9 +254,7 @@ void virtio_gpu_init_dmabuf(struct virtio_gpu_simple_resource *res)
void virtio_gpu_fini_dmabuf(struct virtio_gpu_simple_resource *res)
{
- if (res->remapped) {
- virtio_gpu_destroy_dmabuf(res);
- }
+ virtio_gpu_destroy_dmabuf(res);
}
static void virtio_gpu_free_dmabuf(VirtIOGPU *g, VGPUDMABuf *dmabuf)
--
2.50.1
On 2026/01/23 15:17, Vivek Kasireddy wrote:
> In addition to memfd, a blob resource can also have its backing
> storage in a VFIO device region. Since, there is no effective way
> to determine where the backing storage is located, we first try to
> create a dmabuf assuming it is in memfd. If that fails, we try to
> create a dmabuf assuming it is in VFIO device region.
>
> So, we first call virtio_gpu_create_udmabuf() to check if the blob's
> backing storage is located in a memfd or not. If it is not, we call
> vfio_create_dmabuf() which identifies the right VFIO device and
> eventually invokes the vfio_device_create_dmabuf_fd() API to have
> a dmabuf fd created.
>
> Note that in virtio_gpu_remap_dmabuf(), we first try to test if
> the VFIO dmabuf exporter supports mmap or not. If it doesn't, we
> use the VFIO device fd directly to create the CPU mapping.
>
> While at it, remove the unnecessary rcu_read_lock/rcu_read_unlock
> from virtio_gpu_create_udmabuf() and also replace warn_report()
> with qemu_log_mask().
>
> Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
> Cc: Alex Bennée <alex.bennee@linaro.org>
> Cc: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
> Cc: Dmitry Osipenko <dmitry.osipenko@collabora.com>
> Cc: Alex Williamson <alex@shazbot.org>
> Cc: Cédric Le Goater <clg@redhat.com>
> Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
> ---
> hw/display/Kconfig | 5 ++
> hw/display/virtio-gpu-dmabuf.c | 117 ++++++++++++++++++++++++++++++---
> 2 files changed, 113 insertions(+), 9 deletions(-)
>
> diff --git a/hw/display/Kconfig b/hw/display/Kconfig
> index 1e95ab28ef..0d090f25f5 100644
> --- a/hw/display/Kconfig
> +++ b/hw/display/Kconfig
> @@ -106,6 +106,11 @@ config VIRTIO_VGA
> depends on VIRTIO_PCI
> select VGA
>
> +config VIRTIO_GPU_VFIO_BLOB
> + bool
> + default y
> + depends on VFIO
> +
> config VHOST_USER_GPU
> bool
> default y
> diff --git a/hw/display/virtio-gpu-dmabuf.c b/hw/display/virtio-gpu-dmabuf.c
> index c34d4c85bc..2dfe70d7eb 100644
> --- a/hw/display/virtio-gpu-dmabuf.c
> +++ b/hw/display/virtio-gpu-dmabuf.c
> @@ -18,6 +18,7 @@
> #include "ui/console.h"
> #include "hw/virtio/virtio-gpu.h"
> #include "hw/virtio/virtio-gpu-pixman.h"
> +#include "hw/vfio/vfio-device.h"
> #include "trace.h"
> #include "system/ramblock.h"
> #include "system/hostmem.h"
> @@ -27,6 +28,38 @@
> #include "standard-headers/linux/udmabuf.h"
> #include "standard-headers/drm/drm_fourcc.h"
>
> +static void vfio_create_dmabuf(struct virtio_gpu_simple_resource *res)
> +{
> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> + VFIODevice *vbasedev;
> + RAMBlock *first_rb;
> + ram_addr_t offset;
> +
> + first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
> + if (!first_rb) {
> + qemu_log_mask(LOG_GUEST_ERROR,
> + "%s: Could not find ramblock\n", __func__);
> + return;
> + }
> +
> + vbasedev = vfio_device_lookup(first_rb->mr);
> + if (!vbasedev) {
> + qemu_log_mask(LOG_GUEST_ERROR,
> + "%s: No VFIO device found to create dmabuf from\n",
> + __func__);
> + return;
> + }
> +
> + res->dmabuf_fd = vfio_device_create_dmabuf_fd(vbasedev,
> + res->iov, res->iov_cnt);
> + if (res->dmabuf_fd < 0) {
> + qemu_log_mask(LOG_GUEST_ERROR,
> + "%s: VFIO_DEVICE_FEATURE_DMA_BUF: %s\n",
> + __func__, strerror(errno));
> + }
This API design of vfio_device_create_dmabuf_fd() has some room of
improvements.
First, this function requires to pass VFIODevice * which is derived from
res->iov[0]. Since it is getting res->iov[0] anyway, such a parameter is
redundant. vfio_device_create_dmabuf_fd() can call vfio_device_lookup()
itself instead.
The second problem is the error handling. The error may not be caused by
VFIO_DEVICE_FEATURE_DMA_BUF but may be caused by the guest passing
regions that do not belong to one VFIO device. The two cases should be
distinguished for a correct message.
> +#endif
> +}
> +
> static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
> {
> struct udmabuf_create_list *list;
> @@ -43,10 +76,7 @@ static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
> sizeof(struct udmabuf_create_item) * res->iov_cnt);
>
> for (i = 0; i < res->iov_cnt; i++) {
> - rcu_read_lock();
> rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
> - rcu_read_unlock();
> -
> if (!rb || rb->fd < 0) {
> g_free(list);
> return;
> @@ -62,17 +92,84 @@ static void virtio_gpu_create_udmabuf(struct virtio_gpu_simple_resource *res)
>
> res->dmabuf_fd = ioctl(udmabuf, UDMABUF_CREATE_LIST, list);
> if (res->dmabuf_fd < 0) {
> - warn_report("%s: UDMABUF_CREATE_LIST: %s", __func__,
> - strerror(errno));
> + qemu_log_mask(LOG_GUEST_ERROR,
> + "%s: UDMABUF_CREATE_LIST: %s\n",
> + __func__, strerror(errno));
> }
> g_free(list);
> }
These changes of virtio_gpu_create_udmabuf() are independent so should
go into separate patches with explanation.
>
> +static void *vfio_dmabuf_mmap(struct virtio_gpu_simple_resource *res)
> +{
> + struct vfio_region_info *info = NULL;
> + VFIODevice *vbasedev = NULL;
> + ram_addr_t offset, len = 0;
> + RAMBlock *first_rb, *rb;
> + void *map, *submap;
> + int i, ret = -1;
> +
> + first_rb = qemu_ram_block_from_host(res->iov[0].iov_base, false, &offset);
> + if (!first_rb) {
> + return MAP_FAILED;
> + }
> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> + vbasedev = vfio_device_lookup(first_rb->mr);
> +#endif
> + if (!vbasedev) {
> + return MAP_FAILED;
> + }
> +
> + /*
> + * We first reserve a contiguous chunk of address space for the entire
> + * dmabuf, then replace it with smaller mappings that correspond to the
> + * individual segments of the dmabuf.
> + */
> + map = mmap(NULL, res->blob_size, PROT_READ, MAP_SHARED, vbasedev->fd, 0);
It should have PROT_NONE instead of PROT_READ to prevent accidental
accesses to the placeholder mapping.
Likewise, -1 should be passed instead of vbasedev->fd so that the
placeholder mapping won't result in a mapping to real device.
> + if (map == MAP_FAILED) {
> + return map;
> + }
> +
> + for (i = 0; i < res->iov_cnt; i++) {
> + rb = qemu_ram_block_from_host(res->iov[i].iov_base, false, &offset);
> + if (rb != first_rb) {
> + goto err;
> + }
> +#if defined(VIRTIO_GPU_VFIO_BLOB)
> + ret = vfio_get_region_index_from_mr(rb->mr);
> + if (ret < 0) {
> + goto err;
> + }
> +
> + ret = vfio_device_get_region_info(vbasedev, ret, &info);
This function can take rb->mr and call vfio_device_lookup() and
vfio_get_region_index_from_mr() internally to simplify the API as
similar can be done for vfio_device_create_dmabuf_fd().
> +#endif
> + if (ret < 0 || !info) {
> + goto err;
> + }
> +
> + submap = mmap(map + len, res->iov[i].iov_len, PROT_READ,
> + MAP_SHARED | MAP_FIXED, vbasedev->fd,
> + info->offset + offset);
> + if (submap == MAP_FAILED) {
> + goto err;
> + }
> +
> + len += res->iov[i].iov_len;
> + }
> + return map;
> +err:
> + munmap(map, res->blob_size);
> + return MAP_FAILED;
> +}
> +
> static void virtio_gpu_remap_dmabuf(struct virtio_gpu_simple_resource *res)
> {
> res->remapped = mmap(NULL, res->blob_size, PROT_READ,
> MAP_SHARED, res->dmabuf_fd, 0);
> if (res->remapped == MAP_FAILED) {
> + res->remapped = vfio_dmabuf_mmap(res);
> + if (res->remapped != MAP_FAILED) {
> + return;
> + }
> warn_report("%s: dmabuf mmap failed: %s", __func__,
> strerror(errno));
> res->remapped = NULL;
> @@ -139,8 +236,12 @@ void virtio_gpu_init_dmabuf(struct virtio_gpu_simple_resource *res)
> } else {
> virtio_gpu_create_udmabuf(res);
> if (res->dmabuf_fd < 0) {
> - return;
> + vfio_create_dmabuf(res);
> + if (res->dmabuf_fd < 0) {
> + return;
> + }
> }
> +
> virtio_gpu_remap_dmabuf(res);
> if (!res->remapped) {
> return;
> @@ -153,9 +254,7 @@ void virtio_gpu_init_dmabuf(struct virtio_gpu_simple_resource *res)
>
> void virtio_gpu_fini_dmabuf(struct virtio_gpu_simple_resource *res)
> {
> - if (res->remapped) {
> - virtio_gpu_destroy_dmabuf(res);
> - }
> + virtio_gpu_destroy_dmabuf(res);
> }
>
> static void virtio_gpu_free_dmabuf(VirtIOGPU *g, VGPUDMABuf *dmabuf)
© 2016 - 2026 Red Hat, Inc.