[PATCH v2 2/5] qemu: open VFIO FDs from libvirt backend

Nathan Chen via Devel posted 5 patches 2 weeks, 6 days ago
[PATCH v2 2/5] qemu: open VFIO FDs from libvirt backend
Posted by Nathan Chen via Devel 2 weeks, 6 days ago
Open VFIO FDs from libvirt backend without exposing
these FDs to XML users, i.e. one per iommufd hostdev
for /dev/vfio/devices/vfioX, and pass the FD to qemu
command line.

Signed-off-by: Nathan Chen <nathanc@nvidia.com>
---
 src/conf/domain_conf.h   |   2 +
 src/libvirt_private.syms |   1 +
 src/qemu/qemu_command.c  |  26 ++++++++
 src/qemu/qemu_domain.c   |  39 ++++++++++++
 src/qemu/qemu_domain.h   |  17 +++++
 src/qemu/qemu_process.c  | 130 +++++++++++++++++++++++++++++++++++++++
 src/util/virpci.c        |  69 +++++++++++++++++++++
 src/util/virpci.h        |   2 +
 8 files changed, 286 insertions(+)

diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index 4fd8342950..da4ce9fc86 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -364,6 +364,8 @@ struct _virDomainHostdevDef {
      */
     virDomainNetDef *parentnet;
 
+    virObject *privateData;
+
     virDomainHostdevMode mode;
     virDomainStartupPolicy startupPolicy;
     bool managed;
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 4e57e4a8f6..ed2b0d381e 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -3159,6 +3159,7 @@ virPCIDeviceGetStubDriverName;
 virPCIDeviceGetStubDriverType;
 virPCIDeviceGetUnbindFromStub;
 virPCIDeviceGetUsedBy;
+virPCIDeviceGetVfioPath;
 virPCIDeviceGetVPD;
 virPCIDeviceHasPCIExpressLink;
 virPCIDeviceIsAssignable;
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 95d1c2ee98..9b08f66175 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -4756,6 +4756,12 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
     const char *iommufdId = NULL;
     /* 'ramfb' property must be omitted unless it's to be enabled */
     bool ramfb = pcisrc->ramfb == VIR_TRISTATE_SWITCH_ON;
+    bool useIommufd = false;
+
+    if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO &&
+        pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES) {
+        useIommufd = true;
+    }
 
     /* caller has to assign proper passthrough driver name */
     switch (pcisrc->driver.name) {
@@ -4802,6 +4808,17 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
                               NULL) < 0)
         return NULL;
 
+    if (useIommufd && dev->privateData) {
+        qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(dev);
+
+        if (hostdevPriv->vfioDeviceFd >= 0) {
+            if (virJSONValueObjectAdd(&props,
+                                      "S:fd", g_strdup_printf("%d", hostdevPriv->vfioDeviceFd),
+                                      NULL) < 0)
+                return NULL;
+        }
+    }
+
     if (qemuBuildDeviceAddressProps(props, def, dev->info) < 0)
         return NULL;
 
@@ -5260,6 +5277,15 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
             if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0)
                 return -1;
 
+            if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) {
+                qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev);
+
+                if (hostdevPriv && hostdevPriv->vfioDeviceFd >= 0) {
+                    virCommandPassFD(cmd, hostdevPriv->vfioDeviceFd,
+                                     VIR_COMMAND_PASS_FD_CLOSE_PARENT);
+                }
+            }
+
             if (!(devprops = qemuBuildPCIHostdevDevProps(def, hostdev)))
                 return -1;
 
diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index ac56fc7cb4..7601bdbb2b 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -1238,6 +1238,45 @@ qemuDomainNetworkPrivateFormat(const virDomainNetDef *net,
 }
 
 
+static virClass *qemuDomainHostdevPrivateClass;
+
+static void
+qemuDomainHostdevPrivateDispose(void *obj)
+{
+    qemuDomainHostdevPrivate *priv = obj;
+
+    VIR_FORCE_CLOSE(priv->vfioDeviceFd);
+}
+
+
+static int
+qemuDomainHostdevPrivateOnceInit(void)
+{
+    if (!VIR_CLASS_NEW(qemuDomainHostdevPrivate, virClassForObject()))
+        return -1;
+
+    return 0;
+}
+
+VIR_ONCE_GLOBAL_INIT(qemuDomainHostdevPrivate);
+
+virObject *
+qemuDomainHostdevPrivateNew(void)
+{
+    qemuDomainHostdevPrivate *priv;
+
+    if (qemuDomainHostdevPrivateInitialize() < 0)
+        return NULL;
+
+    if (!(priv = virObjectNew(qemuDomainHostdevPrivateClass)))
+        return NULL;
+
+    priv->vfioDeviceFd = -1;
+
+    return (virObject *) priv;
+}
+
+
 /* qemuDomainSecretInfoSetup:
  * @priv: pointer to domain private object
  * @alias: alias of the secret
diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
index 3396f929fd..4736f1ede5 100644
--- a/src/qemu/qemu_domain.h
+++ b/src/qemu/qemu_domain.h
@@ -461,6 +461,17 @@ struct _qemuDomainTPMPrivate {
 };
 
 
+#define QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev) \
+    ((qemuDomainHostdevPrivate *) (hostdev)->privateData)
+
+typedef struct _qemuDomainHostdevPrivate qemuDomainHostdevPrivate;
+struct _qemuDomainHostdevPrivate {
+    virObject parent;
+
+    /* VFIO device file descriptor for iommufd passthrough */
+    int vfioDeviceFd;
+};
+
 void
 qemuDomainNetworkPrivateClearFDs(qemuDomainNetworkPrivate *priv);
 
@@ -1174,3 +1185,9 @@ qemuDomainCheckCPU(virArch arch,
 bool
 qemuDomainMachineSupportsFloppy(const char *machine,
                                 virQEMUCaps *qemuCaps);
+
+virObject *
+qemuDomainHostdevPrivateNew(void);
+
+int qemuProcessOpenVfioFds(virDomainObj *vm);
+void qemuProcessCloseVfioFds(virDomainObj *vm);
diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 45fc32a663..bf245ee8af 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -106,6 +106,7 @@
 
 #include "logging/log_manager.h"
 #include "logging/log_protocol.h"
+#include "util/virpci.h"
 
 #define VIR_FROM_THIS VIR_FROM_QEMU
 
@@ -8091,6 +8092,9 @@ qemuProcessLaunch(virConnectPtr conn,
     if (qemuExtDevicesStart(driver, vm, incomingMigrationExtDevices) < 0)
         goto cleanup;
 
+    if (qemuProcessOpenVfioFds(vm) < 0)
+        goto cleanup;
+
     if (!(cmd = qemuBuildCommandLine(vm,
                                      incoming ? "defer" : NULL,
                                      vmop,
@@ -10267,3 +10271,129 @@ qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit,
     qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NBDKIT_EXITED, 0, 0, nbdkit);
     virObjectUnlock(vm);
 }
+
+/**
+ * qemuProcessOpenVfioDeviceFd:
+ * @hostdev: host device definition
+ * @vfioFd: returned file descriptor
+ *
+ * Opens the VFIO device file descriptor for a hostdev.
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+static int
+qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev,
+                            int *vfioFd)
+{
+    g_autofree char *vfioPath = NULL;
+    int fd = -1;
+
+
+    if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS ||
+        hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) {
+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+                       _("VFIO FD only supported for PCI hostdevs"));
+        return -1;
+    }
+
+    if (virPCIDeviceGetVfioPath(&hostdev->source.subsys.u.pci.addr, &vfioPath) < 0)
+        return -1;
+
+    VIR_DEBUG("Opening VFIO device %s", vfioPath);
+
+    if ((fd = open(vfioPath, O_RDWR | O_CLOEXEC)) < 0) {
+        if (errno == ENOENT) {
+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+                           _("VFIO device %1$s not found - ensure device is bound to vfio-pci driver"),
+                           vfioPath);
+        } else {
+            virReportSystemError(errno,
+                                 _("cannot open VFIO device %1$s"), vfioPath);
+        }
+        return -1;
+    }
+
+    *vfioFd = fd;
+    VIR_DEBUG("Opened VFIO device FD %d for %s", *vfioFd, vfioPath);
+    return 0;
+}
+
+/**
+ * qemuProcessOpenVfioFds:
+ * @vm: domain object
+ *
+ * Opens all necessary VFIO file descriptors for the domain.
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+int
+qemuProcessOpenVfioFds(virDomainObj *vm)
+{
+    size_t i;
+
+    /* Check if we have any hostdevs that need VFIO FDs */
+    for (i = 0; i < vm->def->nhostdevs; i++) {
+        virDomainHostdevDef *hostdev = vm->def->hostdevs[i];
+        qemuDomainHostdevPrivate *hostdevPriv = NULL;
+
+        if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
+            hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) {
+
+            if (hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO &&
+                hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) {
+
+                if (!hostdev->privateData) {
+                    if (!(hostdev->privateData = qemuDomainHostdevPrivateNew()))
+                        goto error;
+                }
+
+                hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev);
+
+                /* Open VFIO device FD */
+                if (qemuProcessOpenVfioDeviceFd(hostdev, &hostdevPriv->vfioDeviceFd) < 0)
+                    goto error;
+
+                VIR_DEBUG("Stored VFIO FD %d in hostdev %04x:%02x:%02x.%d private data",
+                         hostdevPriv->vfioDeviceFd,
+                         hostdev->source.subsys.u.pci.addr.domain,
+                         hostdev->source.subsys.u.pci.addr.bus,
+                         hostdev->source.subsys.u.pci.addr.slot,
+                         hostdev->source.subsys.u.pci.addr.function);
+            }
+        }
+    }
+
+    return 0;
+
+ error:
+    qemuProcessCloseVfioFds(vm);
+    return -1;
+}
+
+/**
+ * qemuProcessCloseVfioFds:
+ * @vm: domain object
+ *
+ * Closes all VFIO file descriptors for the domain.
+ */
+void
+qemuProcessCloseVfioFds(virDomainObj *vm)
+{
+    size_t i;
+
+    /* Close all VFIO device FDs */
+    for (i = 0; i < vm->def->nhostdevs; i++) {
+        virDomainHostdevDef *hostdev = vm->def->hostdevs[i];
+        qemuDomainHostdevPrivate *hostdevPriv;
+
+        if (!hostdev->privateData)
+            continue;
+
+        hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev);
+
+        if (hostdevPriv->vfioDeviceFd >= 0) {
+            VIR_DEBUG("Closing VFIO device FD %d", hostdevPriv->vfioDeviceFd);
+            VIR_FORCE_CLOSE(hostdevPriv->vfioDeviceFd);
+        }
+    }
+}
diff --git a/src/util/virpci.c b/src/util/virpci.c
index 90617e69c6..da62ece0f6 100644
--- a/src/util/virpci.c
+++ b/src/util/virpci.c
@@ -3320,3 +3320,72 @@ virPCIDeviceAddressFree(virPCIDeviceAddress *address)
 {
     g_free(address);
 }
+
+/**
+ * virPCIDeviceGetVfioPath:
+ * @addr: host device PCI address
+ * @vfioPath: returned VFIO device path
+ *
+ * Constructs the VFIO device path for a PCI hostdev.
+ *
+ * Returns: 0 on success, -1 on failure
+ */
+int
+virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr,
+                        char **vfioPath)
+{
+    g_autofree char *addrStr = NULL;
+
+    *vfioPath = NULL;
+    addrStr = virPCIDeviceAddressAsString(addr);
+
+    /* First try: Direct lookup in device's vfio-dev subdirectory */
+    {
+        g_autofree char *sysfsPath = NULL;
+        g_autoptr(DIR) dir = NULL;
+        struct dirent *entry = NULL;
+
+        sysfsPath = g_strdup_printf("/sys/bus/pci/devices/%s/vfio-dev/", addrStr);
+
+        if (virDirOpen(&dir, sysfsPath) == 1) {
+            while (virDirRead(dir, &entry, sysfsPath) > 0) {
+                if (STRPREFIX(entry->d_name, "vfio")) {
+                    *vfioPath = g_strdup_printf("/dev/vfio/devices/%s", entry->d_name);
+                    return 0;
+                }
+            }
+        }
+    }
+
+    /* Second try: Scan /sys/class/vfio-dev */
+    {
+        g_autofree char *sysfsPath = g_strdup("/sys/class/vfio-dev");
+        g_autoptr(DIR) dir = NULL;
+        struct dirent *entry = NULL;
+
+        if (virDirOpen(&dir, sysfsPath) == 1) {
+            while (virDirRead(dir, &entry, sysfsPath) > 0) {
+                g_autofree char *devLink = NULL;
+                g_autofree char *target = NULL;
+
+                if (!STRPREFIX(entry->d_name, "vfio"))
+                    continue;
+
+                devLink = g_strdup_printf("/sys/class/vfio-dev/%s/device", entry->d_name);
+
+                if (virFileResolveLink(devLink, &target) < 0)
+                    continue;
+
+                if (strstr(target, addrStr)) {
+                    *vfioPath = g_strdup_printf("/dev/vfio/devices/%s", entry->d_name);
+                    return 0;
+                }
+            }
+        }
+    }
+
+    virReportError(VIR_ERR_INTERNAL_ERROR,
+                  _("cannot find VFIO device for PCI device %1$s"),
+                  addrStr);
+    return -1;
+}
diff --git a/src/util/virpci.h b/src/util/virpci.h
index fc538566e1..24ede10755 100644
--- a/src/util/virpci.h
+++ b/src/util/virpci.h
@@ -296,6 +296,8 @@ void virPCIEDeviceInfoFree(virPCIEDeviceInfo *dev);
 
 void virPCIDeviceAddressFree(virPCIDeviceAddress *address);
 
+int virPCIDeviceGetVfioPath(virPCIDeviceAddress *addr, char **vfioPath);
+
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDevice, virPCIDeviceFree);
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIDeviceAddress, virPCIDeviceAddressFree);
 G_DEFINE_AUTOPTR_CLEANUP_FUNC(virPCIEDeviceInfo, virPCIEDeviceInfoFree);
-- 
2.43.0
Re: [PATCH v2 2/5] qemu: open VFIO FDs from libvirt backend
Posted by Ján Tomko via Devel 3 days, 18 hours ago
On a Friday in 2025, Nathan Chen via Devel wrote:
>Open VFIO FDs from libvirt backend without exposing
>these FDs to XML users, i.e. one per iommufd hostdev
>for /dev/vfio/devices/vfioX, and pass the FD to qemu
>command line.
>
>Signed-off-by: Nathan Chen <nathanc@nvidia.com>
>---
> src/conf/domain_conf.h   |   2 +
> src/libvirt_private.syms |   1 +
> src/qemu/qemu_command.c  |  26 ++++++++
> src/qemu/qemu_domain.c   |  39 ++++++++++++
> src/qemu/qemu_domain.h   |  17 +++++
> src/qemu/qemu_process.c  | 130 +++++++++++++++++++++++++++++++++++++++
> src/util/virpci.c        |  69 +++++++++++++++++++++
> src/util/virpci.h        |   2 +
> 8 files changed, 286 insertions(+)
>
>diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
>index 4fd8342950..da4ce9fc86 100644
>--- a/src/conf/domain_conf.h
>+++ b/src/conf/domain_conf.h
>@@ -364,6 +364,8 @@ struct _virDomainHostdevDef {
>      */
>     virDomainNetDef *parentnet;
>
>+    virObject *privateData;
>+

The code adding the privateData should be in a separate patch.
(done in my gitlab branch)

>     virDomainHostdevMode mode;
>     virDomainStartupPolicy startupPolicy;
>     bool managed;
>diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
>index 4e57e4a8f6..ed2b0d381e 100644
>--- a/src/libvirt_private.syms
>+++ b/src/libvirt_private.syms
>@@ -3159,6 +3159,7 @@ virPCIDeviceGetStubDriverName;
> virPCIDeviceGetStubDriverType;
> virPCIDeviceGetUnbindFromStub;
> virPCIDeviceGetUsedBy;
>+virPCIDeviceGetVfioPath;
> virPCIDeviceGetVPD;
> virPCIDeviceHasPCIExpressLink;
> virPCIDeviceIsAssignable;
>diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
>index 95d1c2ee98..9b08f66175 100644
>--- a/src/qemu/qemu_command.c
>+++ b/src/qemu/qemu_command.c
>@@ -4756,6 +4756,12 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
>     const char *iommufdId = NULL;
>     /* 'ramfb' property must be omitted unless it's to be enabled */
>     bool ramfb = pcisrc->ramfb == VIR_TRISTATE_SWITCH_ON;
>+    bool useIommufd = false;
>+
>+    if (pcisrc->driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO &&
>+        pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES) {
>+        useIommufd = true;

No need for this separate bool, just move the two conditions where the
bool is used.

>+    }
>
>     /* caller has to assign proper passthrough driver name */
>     switch (pcisrc->driver.name) {
>@@ -4802,6 +4808,17 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
>                               NULL) < 0)
>         return NULL;
>
>+    if (useIommufd && dev->privateData) {

privateData should be allocated unconditionally for all hostdevs, once
that is done, it does not need to be checked here.

>+        qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(dev);
>+
>+        if (hostdevPriv->vfioDeviceFd >= 0) {

For 'fd's, a != -1 comparison is preferred (any other negative value is
a programming error)

>+            if (virJSONValueObjectAdd(&props,
>+                                      "S:fd", g_strdup_printf("%d", hostdevPriv->vfioDeviceFd),

g_strdup_printf returns an allocated string, but virJSONValueObjectAdd
does not "take ownership" of it.

Try:
g_autofree char *fdstr = g_strdup(..);
at the start of the 'if' block and use 'fdstr' instead'

>+                                      NULL) < 0)
>+                return NULL;
>+        }
>+    }
>+
>     if (qemuBuildDeviceAddressProps(props, def, dev->info) < 0)
>         return NULL;
>
>@@ -5260,6 +5277,15 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
>             if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0)
>                 return -1;
>
>+            if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) {
>+                qemuDomainHostdevPrivate *hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev);
>+
>+                if (hostdevPriv && hostdevPriv->vfioDeviceFd >= 0) {

No need to check for hostdevPriv if we allocate it for all hostdevs.

>+                    virCommandPassFD(cmd, hostdevPriv->vfioDeviceFd,
>+                                     VIR_COMMAND_PASS_FD_CLOSE_PARENT);
>+                }
>+            }
>+
>             if (!(devprops = qemuBuildPCIHostdevDevProps(def, hostdev)))
>                 return -1;
>
>diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
>index ac56fc7cb4..7601bdbb2b 100644
>--- a/src/qemu/qemu_domain.c
>+++ b/src/qemu/qemu_domain.c
>@@ -1238,6 +1238,45 @@ qemuDomainNetworkPrivateFormat(const virDomainNetDef *net,
> }
>
>
>+static virClass *qemuDomainHostdevPrivateClass;
>+
>+static void
>+qemuDomainHostdevPrivateDispose(void *obj)
>+{
>+    qemuDomainHostdevPrivate *priv = obj;
>+
>+    VIR_FORCE_CLOSE(priv->vfioDeviceFd);
>+}
>+
>+
>+static int
>+qemuDomainHostdevPrivateOnceInit(void)
>+{
>+    if (!VIR_CLASS_NEW(qemuDomainHostdevPrivate, virClassForObject()))
>+        return -1;
>+
>+    return 0;
>+}
>+
>+VIR_ONCE_GLOBAL_INIT(qemuDomainHostdevPrivate);
>+
>+virObject *
>+qemuDomainHostdevPrivateNew(void)

This functions should also be added to
virQEMUDriverPrivateDataCallbacks as .hostdevNew,
then it can be called from the parser:

virDomainHostdevDefNew(virDomainXMLOption *xmlopt)
{
   ...
     if (xmlopt && xmlopt->privateData.hostdevNew &&
         !(def->privateData = xmlopt->privateData.hostdevNew())) {
         VIR_FREE(def->info);
         VIR_FREE(def);
         return NULL;
     }
   ...
}

(Yes, the callers would need to be adjusted too)

>+{
>+    qemuDomainHostdevPrivate *priv;
>+
>+    if (qemuDomainHostdevPrivateInitialize() < 0)
...
>diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
>index 45fc32a663..bf245ee8af 100644
>--- a/src/qemu/qemu_process.c
>+++ b/src/qemu/qemu_process.c
>@@ -106,6 +106,7 @@
>
> #include "logging/log_manager.h"
> #include "logging/log_protocol.h"
>+#include "util/virpci.h"

No need for the util prefix. Also, can be grouped in the previous group.

>
> #define VIR_FROM_THIS VIR_FROM_QEMU
>
>@@ -8091,6 +8092,9 @@ qemuProcessLaunch(virConnectPtr conn,
>     if (qemuExtDevicesStart(driver, vm, incomingMigrationExtDevices) < 0)
>         goto cleanup;
>
>+    if (qemuProcessOpenVfioFds(vm) < 0)
>+        goto cleanup;
>+
>     if (!(cmd = qemuBuildCommandLine(vm,
>                                      incoming ? "defer" : NULL,
>                                      vmop,
>@@ -10267,3 +10271,129 @@ qemuProcessHandleNbdkitExit(qemuNbdkitProcess *nbdkit,
>     qemuProcessEventSubmit(vm, QEMU_PROCESS_EVENT_NBDKIT_EXITED, 0, 0, nbdkit);
>     virObjectUnlock(vm);
> }
>+
>+/**
>+ * qemuProcessOpenVfioDeviceFd:
>+ * @hostdev: host device definition
>+ * @vfioFd: returned file descriptor
>+ *
>+ * Opens the VFIO device file descriptor for a hostdev.
>+ *
>+ * Returns: 0 on success, -1 on failure

This can return the fd on success directly, no need to return it via
pointer.

>+ */
>+static int
>+qemuProcessOpenVfioDeviceFd(virDomainHostdevDef *hostdev,
>+                            int *vfioFd)
>+{
>+    g_autofree char *vfioPath = NULL;
>+    int fd = -1;
>+
>+
>+    if (hostdev->mode != VIR_DOMAIN_HOSTDEV_MODE_SUBSYS ||
>+        hostdev->source.subsys.type != VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) {
>+        virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
>+                       _("VFIO FD only supported for PCI hostdevs"));
>+        return -1;
>+    }
>+
>+    if (virPCIDeviceGetVfioPath(&hostdev->source.subsys.u.pci.addr, &vfioPath) < 0)
>+        return -1;
>+
>+    VIR_DEBUG("Opening VFIO device %s", vfioPath);
>+
>+    if ((fd = open(vfioPath, O_RDWR | O_CLOEXEC)) < 0) {
>+        if (errno == ENOENT) {
>+            virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
>+                           _("VFIO device %1$s not found - ensure device is bound to vfio-pci driver"),
>+                           vfioPath);
>+        } else {
>+            virReportSystemError(errno,
>+                                 _("cannot open VFIO device %1$s"), vfioPath);
>+        }
>+        return -1;
>+    }
>+
>+    *vfioFd = fd;
>+    VIR_DEBUG("Opened VFIO device FD %d for %s", *vfioFd, vfioPath);
>+    return 0;
>+}
>+
>+/**
>+ * qemuProcessOpenVfioFds:
>+ * @vm: domain object
>+ *
>+ * Opens all necessary VFIO file descriptors for the domain.
>+ *
>+ * Returns: 0 on success, -1 on failure
>+ */
>+int
>+qemuProcessOpenVfioFds(virDomainObj *vm)
>+{
>+    size_t i;
>+
>+    /* Check if we have any hostdevs that need VFIO FDs */
>+    for (i = 0; i < vm->def->nhostdevs; i++) {
>+        virDomainHostdevDef *hostdev = vm->def->hostdevs[i];
>+        qemuDomainHostdevPrivate *hostdevPriv = NULL;
>+

>+        if (hostdev->mode == VIR_DOMAIN_HOSTDEV_MODE_SUBSYS &&
>+            hostdev->source.subsys.type == VIR_DOMAIN_HOSTDEV_SUBSYS_TYPE_PCI) {
>+
>+            if (hostdev->source.subsys.u.pci.driver.name == VIR_DEVICE_HOSTDEV_PCI_DRIVER_NAME_VFIO &&
>+                hostdev->source.subsys.u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES) {
>+

These two conditions above can be joint into one to save one level of
indentation

>+                if (!hostdev->privateData) {
>+                    if (!(hostdev->privateData = qemuDomainHostdevPrivateNew()))
>+                        goto error;
>+                }
>+

This should be allocated in virDomainHostdevDefNew.

>+                hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev);
>+
>+                /* Open VFIO device FD */
>+                if (qemuProcessOpenVfioDeviceFd(hostdev, &hostdevPriv->vfioDeviceFd) < 0)
>+                    goto error;
>+

>+                VIR_DEBUG("Stored VFIO FD %d in hostdev %04x:%02x:%02x.%d private data",
>+                         hostdevPriv->vfioDeviceFd,
>+                         hostdev->source.subsys.u.pci.addr.domain,
>+                         hostdev->source.subsys.u.pci.addr.bus,
>+                         hostdev->source.subsys.u.pci.addr.slot,
>+                         hostdev->source.subsys.u.pci.addr.function);

No need for this debug log since the function above just logged the same
info

>+            }
>+        }
>+    }
>+
>+    return 0;
>+
>+ error:
>+    qemuProcessCloseVfioFds(vm);

No need to close the file descriptors here, they will be closed when the
private data gets destroyed along with the rest of the live domain
definition (i.e. the copy libvirt makes on domain startup)

>+    return -1;
>+}
>+
>+/**
>+ * qemuProcessCloseVfioFds:
>+ * @vm: domain object
>+ *
>+ * Closes all VFIO file descriptors for the domain.
>+ */
>+void
>+qemuProcessCloseVfioFds(virDomainObj *vm)

This function is not necessary

>+{
>+    size_t i;
>+
>+    /* Close all VFIO device FDs */
>+    for (i = 0; i < vm->def->nhostdevs; i++) {
>+        virDomainHostdevDef *hostdev = vm->def->hostdevs[i];
>+        qemuDomainHostdevPrivate *hostdevPriv;
>+
>+        if (!hostdev->privateData)
>+            continue;
>+
>+        hostdevPriv = QEMU_DOMAIN_HOSTDEV_PRIVATE(hostdev);
>+
>+        if (hostdevPriv->vfioDeviceFd >= 0) {
>+            VIR_DEBUG("Closing VFIO device FD %d", hostdevPriv->vfioDeviceFd);
>+            VIR_FORCE_CLOSE(hostdevPriv->vfioDeviceFd);
>+        }
>+    }
>+}

Jano