In order to create a dmabuf associated with a buffer that spans
multiple ranges, we first need to identify the VFIO region and
index the buffer (represented by iovec) belongs to and then
translate its addresses to offsets within that region.
The qemu_ram_block_from_host() API gives us both the region and
the offset info we need to populate the dma ranges so that we can
invoke the VFIO_DEVICE_FEATURE_DMA_BUF feature to create the dmabuf.
Also, instead of iterating over all QOM devices to find the
VFIODevice associated with a memory region, introduce a helper to
just use the vfio_device_list to lookup the VFIODevice.
Lastly, introduce an enum to return the type of error encountered
while creating the dmabuf fd.
Cc: Alex Williamson <alex@shazbot.org>
Cc: Cédric Le Goater <clg@redhat.com>
Signed-off-by: Vivek Kasireddy <vivek.kasireddy@intel.com>
---
hw/vfio/device.c | 84 +++++++++++++++++++++++++++++++++++
hw/vfio/dmabuf-stubs.c | 17 +++++++
hw/vfio/meson.build | 1 +
include/hw/vfio/vfio-device.h | 22 +++++++++
4 files changed, 124 insertions(+)
create mode 100644 hw/vfio/dmabuf-stubs.c
diff --git a/hw/vfio/device.c b/hw/vfio/device.c
index 973fc35b59..a25ecc3697 100644
--- a/hw/vfio/device.c
+++ b/hw/vfio/device.c
@@ -21,6 +21,7 @@
#include "qemu/osdep.h"
#include <sys/ioctl.h>
+#include "system/ramblock.h"
#include "hw/vfio/vfio-device.h"
#include "hw/vfio/pci.h"
#include "hw/core/iommu.h"
@@ -644,3 +645,86 @@ static VFIODeviceIOOps vfio_device_io_ops_ioctl = {
.region_read = vfio_device_io_region_read,
.region_write = vfio_device_io_region_write,
};
+
+static bool vfio_device_lookup(struct iovec *iov, VFIODevice **vbasedevp,
+ Error **errp)
+{
+ VFIODevice *vbasedev;
+ RAMBlock *first_rb;
+ ram_addr_t offset;
+
+ first_rb = qemu_ram_block_from_host(iov[0].iov_base, false, &offset);
+ if (!first_rb) {
+ error_setg(errp, "Could not find first ramblock\n");
+ return false;
+ }
+
+ QLIST_FOREACH(vbasedev, &vfio_device_list, next) {
+ if (vbasedev->dev == first_rb->mr->dev) {
+ *vbasedevp = vbasedev;
+ return true;
+ }
+ }
+ error_setg(errp, "No VFIO device found to create dmabuf from\n");
+ return false;
+}
+
+int vfio_device_create_dmabuf_fd(struct iovec *iov, unsigned int iov_cnt,
+ Error **errp)
+{
+ g_autofree struct vfio_device_feature *feature = NULL;
+ struct vfio_device_feature_dma_buf *dma_buf;
+ RAMBlock *rb, *first_rb;
+ VFIODevice *vbasedev;
+ ram_addr_t offset;
+ int i, index, ret;
+ size_t argsz;
+
+ argsz = sizeof(*feature) + sizeof (*dma_buf) +
+ sizeof(struct vfio_region_dma_range) * iov_cnt;
+ feature = g_malloc0(argsz);
+ *feature = (struct vfio_device_feature) {
+ .argsz = argsz,
+ .flags = VFIO_DEVICE_FEATURE_GET | VFIO_DEVICE_FEATURE_DMA_BUF,
+ };
+ dma_buf = (struct vfio_device_feature_dma_buf *)feature->data;
+
+ if (!vfio_device_lookup(iov, &vbasedev, errp)) {
+ return VFIO_DMABUF_CREATE_GUEST_ERROR;
+ }
+
+ for (i = 0; i < iov_cnt; i++) {
+ rb = qemu_ram_block_from_host(iov[i].iov_base, false, &offset);
+ if (i == 0) {
+ first_rb = rb;
+ }
+ if (!rb || rb != first_rb) {
+ error_setg(errp, "Cannot create dmabuf with different regions\n");
+ return VFIO_DMABUF_CREATE_GUEST_ERROR;
+ }
+
+ index = vfio_get_region_index_from_mr(rb->mr);
+ if (index < 0) {
+ error_setg(errp, "Cannot find region index for dmabuf segment\n");
+ return VFIO_DMABUF_CREATE_GUEST_ERROR;
+ }
+
+ dma_buf->region_index = index;
+ dma_buf->dma_ranges[i].offset = offset;
+ dma_buf->dma_ranges[i].length = iov[i].iov_len;
+ }
+
+ dma_buf->nr_ranges = iov_cnt;
+ dma_buf->open_flags = O_RDONLY | O_CLOEXEC;
+
+ ret = vfio_device_get_feature(vbasedev, feature);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "Could not create dmabuf fd via VFIO device");
+ if (ret == -ENODEV || ret == -EINVAL) {
+ return VFIO_DMABUF_CREATE_GUEST_ERROR;
+ }
+ return VFIO_DMABUF_CREATE_HOST_ERROR;
+ }
+ return ret;
+}
diff --git a/hw/vfio/dmabuf-stubs.c b/hw/vfio/dmabuf-stubs.c
new file mode 100644
index 0000000000..e71e2b68dd
--- /dev/null
+++ b/hw/vfio/dmabuf-stubs.c
@@ -0,0 +1,17 @@
+/*
+ * Copyright (c) 2026 Intel and/or its affiliates.
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/iov.h"
+#include "hw/vfio/vfio-device.h"
+
+int vfio_device_create_dmabuf_fd(struct iovec *iov, unsigned int iov_cnt,
+ Error **errp)
+{
+ error_setg(errp, "VFIO dmabuf support is not enabled");
+ return VFIO_DMABUF_CREATE_HOST_ERROR;
+}
diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
index 82f68698fb..ac899d30a8 100644
--- a/hw/vfio/meson.build
+++ b/hw/vfio/meson.build
@@ -34,3 +34,4 @@ system_ss.add(when: 'CONFIG_IOMMUFD', if_false: files('iommufd-stubs.c'))
system_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
'display.c',
))
+system_ss.add(when: 'CONFIG_VFIO', if_false: files('dmabuf-stubs.c'))
diff --git a/include/hw/vfio/vfio-device.h b/include/hw/vfio/vfio-device.h
index 596c7f5a10..9dcf3ea57d 100644
--- a/include/hw/vfio/vfio-device.h
+++ b/include/hw/vfio/vfio-device.h
@@ -41,6 +41,13 @@ enum {
VFIO_DEVICE_TYPE_AP = 3,
};
+enum {
+ /* Guest is responsible for this error */
+ VFIO_DMABUF_CREATE_GUEST_ERROR = -1,
+ /* Host is at fault for this error */
+ VFIO_DMABUF_CREATE_HOST_ERROR = -2,
+};
+
typedef struct VFIODeviceOps VFIODeviceOps;
typedef struct VFIODeviceIOOps VFIODeviceIOOps;
typedef struct VFIOMigration VFIOMigration;
@@ -318,6 +325,21 @@ int vfio_device_get_irq_info(VFIODevice *vbasedev, int index,
* Returns the region index or -1 on error.
*/
int vfio_get_region_index_from_mr(MemoryRegion *mr);
+
+/**
+ * Create a dmabuf fd by first translating the addresses in the
+ * iovec array into VFIO region offsets and then invoking the
+ * VFIO_DEVICE_FEATURE_DMA_BUF feature.
+ *
+ * @iov: array of iovec entries associated with a buffer
+ * @iov_cnt: number of entries in the iovec array
+ * @errp: pointer to Error*, to store an error if it happens.
+ *
+ * Returns the created dmabuf fd or either VFIO_DMABUF_CREATE_GUEST_ERROR
+ * or VFIO_DMABUF_CREATE_HOST_ERROR on error.
+ */
+int vfio_device_create_dmabuf_fd(struct iovec *iov, unsigned int iov_cnt,
+ Error **errp);
#endif
/* Returns 0 on success, or a negative errno. */
--
2.53.0