From: Nathan Chen <nathanc@nvidia.com>
Open VFIO FDs from libvirt backend without exposing
these FDs to XML users, i.e. one per iommufd hostdev
for /dev/vfio/devices/vfioX, and pass the FD to qemu
command line.
Suggested-by: Ján Tomko <jtomko@redhat.com>
Signed-off-by: Nathan Chen <nathanc@nvidia.com>
---
src/libvirt_private.syms | 1 +
src/qemu/qemu_command.c | 21 +++++++++++
src/qemu/qemu_process.c | 79 ++++++++++++++++++++++++++++++++++++++++
src/util/virpci.c | 42 +++++++++++++++++++++
src/util/virpci.h | 2 +
5 files changed, 145 insertions(+)
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index a8eadbfb8a..0904265459 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -3163,6 +3163,7 @@ virPCIDeviceGetStubDriverName;
virPCIDeviceGetStubDriverType;
virPCIDeviceGetUnbindFromStub;
virPCIDeviceGetUsedBy;
+virPCIDeviceGetVfioPath;
virPCIDeviceGetVPD;
virPCIDeviceHasPCIExpressLink;
virPCIDeviceIsAssignable;
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 98e4469c25..2a16f9df63 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -4809,6 +4809,18 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
NULL) < 0)
return NULL;
+ if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO &&
+ pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES) {
+ qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(dev);
+
+ if (hostdevPriv->vfioDeviceFd != -1) {
+ g_autofree char *fdstr = g_strdup_printf("%d", hostdevPriv->vfioDeviceFd);
+ if (virJSONValueObjectAdd(&props, "S:fd", fdstr, NULL) < 0)
+ return NULL;
+ hostdevPriv->vfioDeviceFd = -1;
+ }
+ }
+
if (qemuBuildDeviceAddressProps(props, def, dev->info) < 0)
return NULL;
@@ -5253,6 +5265,15 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0)
return -1;
+ if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) {
+ qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev);
+
+ if (hostdevPriv->vfioDeviceFd != -1) {
+ virCommandPassFD(cmd, hostdevPriv->vfioDeviceFd,
+ VIR_COMMAND_PASS_FD_CLOSE_PARENT);
+ }
+ }
+
if (!(devprops = qemuBuildPCIHostdevDevProps(def, hostdev)))
return -1;
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 0e50cd1ccc..ab88a6bf62 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -103,6 +103,7 @@
#include "storage_source.h"
#include "backup_conf.h"
#include "storage_file_probe.h"
+#include "virpci.h"
#include "logging/log_manager.h"
#include "logging/log_protocol.h"
@@ -8181,6 +8182,9 @@ qemuProcessLaunch(virConnectPtr conn,
if (qemuExtDevicesStart(driver, vm, incomingMigrationExtDevices) < 0)
goto cleanup;
+ if (qemuProcessOpenVfioFds(vm) < 0)
+ goto cleanup;
+
if (!(cmd = qemuBuildCommandLine(vm,
incoming ? "defer" : NULL,
vmop,
@@ -10360,3 +10364,78 @@ qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit,
qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NBDKIT_EXITED, 0, 0, nbdkit);
virObjectUnlock(vm);
}
+
+/**
+ * qemuProcessOpenVfioDeviceFd:
+ * @hostdev: host device definition
+ * @vfioFd: returned file descriptor
+ *
+ * Opens the VFIO device file descriptor for a hostdev.
+ *
+ * Returns: FD on success, -1 on failure
+ */
+static int
+qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev)
+{
+ g_autofree char *vfioPath = NULL;
+ int fd = -1;
+
+ if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS ||
+ hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) {
+ virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+ _("VFIO FD only supported for PCI hostdevs"));
+ return -1;
+ }
+
+ if (virPCIDeviceGetVfioPath(&hostdev->source.subsys.u.pci.addr, &vfioPath) < 0)
+ return -1;
+
+ VIR_DEBUG("Opening VFIO device %s", vfioPath);
+
+ if ((fd = open(vfioPath, O_RDWR | O_CLOEXEC)) < 0) {
+ if (errno == ENOENT) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+ _("VFIO device %1$s not found - ensure device is bound to vfio-pci driver"),
+ vfioPath);
+ } else {
+ virReportSystemError(errno,
+ _("cannot open VFIO device %1$s"), vfioPath);
+ }
+ return -1;
+ }
+
+ VIR_DEBUG("Opened VFIO device FD %d for %s", fd, vfioPath);
+ return fd;
+}
+
+/**
+ * qemuProcessOpenVfioFds:
+ * @vm: domain object
+ *
+ * Opens all necessary VFIO file descriptors for the domain.
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+int
+qemuProcessOpenVfioFds(virDomainObj *vm)
+{
+ size_t i;
+
+ /* Check if we have any hostdevs that need VFIO FDs */
+ for (i = 0; i < vm->def->nhostdevs; i++) {
+ virDomainHostdevDef *hostdev = vm->def->hostdevs[i];
+ qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev);
+
+ if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
+ hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI &&
+ hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO &&
+ hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) {
+ /* Open VFIO device FD */
+ hostdevPriv->vfioDeviceFd = qemuProcessOpenVfioDeviceFd(hostdev);
+ if (hostdevPriv->vfioDeviceFd == -1)
+ return -1;
+ }
+ }
+
+ return 0;
+}
diff --git a/src/util/virpci.c b/src/util/virpci.c
index 90617e69c6..886e2c55e6 100644
--- a/src/util/virpci.c
+++ b/src/util/virpci.c
@@ -3320,3 +3320,45 @@ virPCIDeviceAddressFree(virPCIDeviceAddress *address)
{
g_free(address);
}
+
+/**
+ * virPCIDeviceGetVfioPath:
+ * @addr: host device PCI address
+ * @vfioPath: returned VFIO device path
+ *
+ * Constructs the VFIO device path for a PCI hostdev.
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+int
+virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr,
+ char **vfioPath)
+{
+ g_autofree char *addrStr = NULL;
+
+ *vfioPath = NULL;
+ addrStr = virPCIDeviceAddressAsString(addr);
+
+ /* First try: Direct lookup in device's vfio-dev subdirectory */
+ {
+ g_autofree char *sysfsPath = NULL;
+ g_autoptr(DIR) dir = NULL;
+ struct dirent *entry = NULL;
+
+ sysfsPath = g_strdup_printf("/sys/bus/pci/devices/%s/vfio-dev/", addrStr);
+
+ if (virDirOpen(&dir, sysfsPath) == 1) {
+ while (virDirRead(dir, &entry, sysfsPath) > 0) {
+ if (STRPREFIX(entry->d_name, "vfio")) {
+ *vfioPath = g_strdup_printf("/dev/vfio/devices/%s", entry->d_name);
+ return 0;
+ }
+ }
+ }
+ }
+
+ virReportError(VIR_ERR_INTERNAL_ERROR,
+ _("cannot find VFIO device for PCI device %1$s"),
+ addrStr);
+ return -1;
+}
diff --git a/src/util/virpci.h b/src/util/virpci.h
index fc538566e1..24ede10755 100644
--- a/src/util/virpci.h
+++ b/src/util/virpci.h
@@ -296,6 +296,8 @@ void virPCIEDeviceInfoFree(virPCIEDeviceInfo *dev);
void virPCIDeviceAddressFree(virPCIDeviceAddress *address);
+int virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr, char **vfioPath);
+
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDevice, virPCIDeviceFree);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceAddress, virPCIDeviceAddressFree);
G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIEDeviceInfo, virPCIEDeviceInfoFree);
--
2.43.0
© 2016 - 2026 Red Hat, Inc.