[PATCH v2 1/5] qemu: Implement support for associating iommufd to hostdev

Nathan Chen via Devel posted 5 patches 2 weeks, 6 days ago
[PATCH v2 1/5] qemu: Implement support for associating iommufd to hostdev
Posted by Nathan Chen via Devel 2 weeks, 6 days ago
Implement a new iommufd attribute under hostdevs' PCI
subsystem driver that can be used to specify associated
iommufd object when launching a qemu VM.

Signed-off-by: Nathan Chen <nathanc@nvidia.com>
---
 docs/formatdomain.rst           |  8 ++++++++
 src/conf/device_conf.c          | 12 ++++++++++++
 src/conf/device_conf.h          |  1 +
 src/conf/schemas/basictypes.rng |  5 +++++
 src/qemu/qemu_command.c         | 19 +++++++++++++++++++
 5 files changed, 45 insertions(+)

diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
index 160e7ad9c7..dcb24b1b23 100644
--- a/docs/formatdomain.rst
+++ b/docs/formatdomain.rst
@@ -4862,6 +4862,7 @@ or:
    device; if PCI ROM loading is disabled through this attribute, attempts to
    tweak the loading process further using the ``bar`` or ``file`` attributes
    will be rejected. :since:`Since 4.3.0 (QEMU and KVM only)`.
+
 ``address``
    The ``address`` element for USB devices has a ``bus`` and ``device``
    attribute to specify the USB bus and device number the device appears at on
@@ -4902,6 +4903,13 @@ or:
    found is "problematic" in some way, the generic vfio-pci driver
    similarly be forced.
 
+   The ``<driver>`` element's ``iommufd`` attribute is used to specify
+   using the iommufd interface to propagate DMA mappings to the kernel,
+   instead of VFIO alone. When the attribute is present, an iommufd
+   object will be created by the resulting qemu command. Libvirt will
+   open the /dev/iommu and VFIO device cdev, passing the associated
+   file descriptor numbers to the qemu command.
+
    (Note: :since:`Since 1.0.5`, the ``name`` attribute has been
    described to be used to select the type of PCI device assignment
    ("vfio", "kvm", or "xen"), but those values have been mostly
diff --git a/src/conf/device_conf.c b/src/conf/device_conf.c
index c278b81652..7682236d65 100644
--- a/src/conf/device_conf.c
+++ b/src/conf/device_conf.c
@@ -60,6 +60,8 @@ int
 virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node,
                                       virDeviceHostdevPCIDriverInfo *driver)
 {
+    virTristateBool iommufd;
+    driver->iommufd = false;
     if (virXMLPropEnum(node, "name",
                        virDeviceHostdevPCIDriverNameTypeFromString,
                        VIR_XML_PROP_NONZERO,
@@ -67,6 +69,10 @@ virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node,
         return -1;
     }
 
+    if (virXMLPropTristateBool(node, "iommufd", VIR_XML_PROP_NONE, &iommufd) < 0)
+        return -1;
+    driver->iommufd = iommufd;
+
     driver->model = virXMLPropString(node, "model");
     return 0;
 }
@@ -93,6 +99,12 @@ virDeviceHostdevPCIDriverInfoFormat(virBuffer *buf,
 
     virBufferEscapeString(&driverAttrBuf, " model='%s'", driver->model);
 
+    if (driver->iommufd == VIR_TRISTATE_BOOL_YES) {
+        virBufferAddLit(&driverAttrBuf, " iommufd='yes'");
+    } else if (driver->iommufd == VIR_TRISTATE_BOOL_NO) {
+        virBufferAddLit(&driverAttrBuf, " iommufd='no'");
+    }
+
     virXMLFormatElement(buf, "driver", &driverAttrBuf, NULL);
     return 0;
 }
diff --git a/src/conf/device_conf.h b/src/conf/device_conf.h
index e570f51824..116b959143 100644
--- a/src/conf/device_conf.h
+++ b/src/conf/device_conf.h
@@ -47,6 +47,7 @@ VIR_ENUM_DECL(virDeviceHostdevPCIDriverName);
 struct _virDeviceHostdevPCIDriverInfo {
     virDeviceHostdevPCIDriverName name;
     char *model;
+    virTristateBool iommufd;
 };
 
 typedef enum {
diff --git a/src/conf/schemas/basictypes.rng b/src/conf/schemas/basictypes.rng
index 2931e316b7..089fc0f1c2 100644
--- a/src/conf/schemas/basictypes.rng
+++ b/src/conf/schemas/basictypes.rng
@@ -673,6 +673,11 @@
           <ref name="genericName"/>
         </attribute>
       </optional>
+      <optional>
+        <attribute name="iommufd">
+          <ref name="virYesNo"/>
+        </attribute>
+      </optional>
       <empty/>
     </element>
   </define>
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 5a834ef842..95d1c2ee98 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -4753,6 +4753,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
     g_autofree char *host = virPCIDeviceAddressAsString(&pcisrc->addr);
     const char *failover_pair_id = NULL;
     const char *driver = NULL;
+    const char *iommufdId = NULL;
     /* 'ramfb' property must be omitted unless it's to be enabled */
     bool ramfb = pcisrc->ramfb == VIR_TRISTATE_SWITCH_ON;
 
@@ -4786,6 +4787,9 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
         teaming->persistent)
         failover_pair_id = teaming->persistent;
 
+    if (pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES)
+        iommufdId = "iommufd0";
+
     if (virJSONValueObjectAdd(&props,
                               "s:driver", driver,
                               "s:host", host,
@@ -4794,6 +4798,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
                               "S:failover_pair_id", failover_pair_id,
                               "S:display", qemuOnOffAuto(pcisrc->display),
                               "B:ramfb", ramfb,
+                              "S:iommufd", iommufdId,
                               NULL) < 0)
         return NULL;
 
@@ -5210,6 +5215,9 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
                             virQEMUCaps *qemuCaps)
 {
     size_t i;
+    g_autoptr(virJSONValue) props = NULL;
+    int iommufd = 0;
+    const char * iommufdId = "iommufd0";
 
     for (i = 0; i < def->nhostdevs; i++) {
         virDomainHostdevDef *hostdev = def->hostdevs[i];
@@ -5238,6 +5246,17 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
            if (hostdev->info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_UNASSIGNED)
                continue;
 
+            if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES && iommufd == 0) {
+                iommufd = 1;
+                if (qemuMonitorCreateObjectProps(&props, "iommufd",
+                                                 iommufdId,
+                                                 NULL) < 0)
+                    return -1;
+
+                if (qemuBuildObjectCommandlineFromJSON(cmd, props) < 0)
+                    return -1;
+            }
+
             if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0)
                 return -1;
 
-- 
2.43.0
Re: [PATCH v2 1/5] qemu: Implement support for associating iommufd to hostdev
Posted by Ján Tomko via Devel 3 days, 18 hours ago
On a Friday in 2025, Nathan Chen via Devel wrote:

To get rid of the "via Devel" part in git commit's Author: field,
set the format.from  and format.forceInBodyFrom fields in your
gitconfig:

https://libvirt.org/submitting-patches.html#git-configuration

>Implement a new iommufd attribute under hostdevs' PCI
>subsystem driver that can be used to specify associated
>iommufd object when launching a qemu VM.
>

Also, I've applied most of the stuff I say here in my branch,
mostly to see if they are possible:
https://gitlab.com/janotomko/libvirt/-/tree/iommufd?ref_type=heads
git fetch https://gitlab.com/janotomko/libvirt.git iommufd

>Signed-off-by: Nathan Chen <nathanc@nvidia.com>
>---
> docs/formatdomain.rst           |  8 ++++++++
> src/conf/device_conf.c          | 12 ++++++++++++
> src/conf/device_conf.h          |  1 +
> src/conf/schemas/basictypes.rng |  5 +++++
> src/qemu/qemu_command.c         | 19 +++++++++++++++++++
> 5 files changed, 45 insertions(+)
>
>diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
>index 160e7ad9c7..dcb24b1b23 100644
>--- a/docs/formatdomain.rst
>+++ b/docs/formatdomain.rst
>@@ -4862,6 +4862,7 @@ or:
>    device; if PCI ROM loading is disabled through this attribute, attempts to
>    tweak the loading process further using the ``bar`` or ``file`` attributes
>    will be rejected. :since:`Since 4.3.0 (QEMU and KVM only)`.
>+

Unrelated and unnecessary whitespace change (lots of others in the file
do not put a space between attributes)

> ``address``
>    The ``address`` element for USB devices has a ``bus`` and ``device``
>    attribute to specify the USB bus and device number the device appears at on
>@@ -4902,6 +4903,13 @@ or:
>    found is "problematic" in some way, the generic vfio-pci driver
>    similarly be forced.
>
>+   The ``<driver>`` element's ``iommufd`` attribute is used to specify
>+   using the iommufd interface to propagate DMA mappings to the kernel,
>+   instead of VFIO alone. When the attribute is present, an iommufd
>+   object will be created by the resulting qemu command. Libvirt will
>+   open the /dev/iommu and VFIO device cdev, passing the associated
>+   file descriptor numbers to the qemu command.
>+
>    (Note: :since:`Since 1.0.5`, the ``name`` attribute has been
>    described to be used to select the type of PCI device assignment
>    ("vfio", "kvm", or "xen"), but those values have been mostly
>diff --git a/src/conf/device_conf.c b/src/conf/device_conf.c
>index c278b81652..7682236d65 100644
>--- a/src/conf/device_conf.c
>+++ b/src/conf/device_conf.c
>@@ -60,6 +60,8 @@ int
> virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node,
>                                       virDeviceHostdevPCIDriverInfo *driver)
> {
>+    virTristateBool iommufd;

This intermediary variable is not needed

>+    driver->iommufd = false;

and this is not a bool variable.

>     if (virXMLPropEnum(node, "name",
>                        virDeviceHostdevPCIDriverNameTypeFromString,
>                        VIR_XML_PROP_NONZERO,
>@@ -67,6 +69,10 @@ virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node,
>         return -1;
>     }
>
>+    if (virXMLPropTristateBool(node, "iommufd", VIR_XML_PROP_NONE, &iommufd) < 0)
>+        return -1;

Just let virXMLPropTristateBool handle the default and write into
driver->iommufd directly.

>+    driver->iommufd = iommufd;
>+
>     driver->model = virXMLPropString(node, "model");
>     return 0;
> }
>@@ -93,6 +99,12 @@ virDeviceHostdevPCIDriverInfoFormat(virBuffer *buf,
>
>     virBufferEscapeString(&driverAttrBuf, " model='%s'", driver->model);
>
>+    if (driver->iommufd == VIR_TRISTATE_BOOL_YES) {
>+        virBufferAddLit(&driverAttrBuf, " iommufd='yes'");
>+    } else if (driver->iommufd == VIR_TRISTATE_BOOL_NO) {
>+        virBufferAddLit(&driverAttrBuf, " iommufd='no'");
>+    }
>+
>     virXMLFormatElement(buf, "driver", &driverAttrBuf, NULL);
>     return 0;
> }
>diff --git a/src/conf/device_conf.h b/src/conf/device_conf.h
>index e570f51824..116b959143 100644
>--- a/src/conf/device_conf.h
>+++ b/src/conf/device_conf.h
>@@ -47,6 +47,7 @@ VIR_ENUM_DECL(virDeviceHostdevPCIDriverName);
> struct _virDeviceHostdevPCIDriverInfo {
>     virDeviceHostdevPCIDriverName name;
>     char *model;
>+    virTristateBool iommufd;
> };
>
> typedef enum {
>diff --git a/src/conf/schemas/basictypes.rng b/src/conf/schemas/basictypes.rng
>index 2931e316b7..089fc0f1c2 100644
>--- a/src/conf/schemas/basictypes.rng
>+++ b/src/conf/schemas/basictypes.rng
>@@ -673,6 +673,11 @@
>           <ref name="genericName"/>
>         </attribute>
>       </optional>
>+      <optional>
>+        <attribute name="iommufd">
>+          <ref name="virYesNo"/>
>+        </attribute>
>+      </optional>
>       <empty/>
>     </element>
>   </define>
>diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
>index 5a834ef842..95d1c2ee98 100644
>--- a/src/qemu/qemu_command.c
>+++ b/src/qemu/qemu_command.c
>@@ -4753,6 +4753,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
>     g_autofree char *host = virPCIDeviceAddressAsString(&pcisrc->addr);
>     const char *failover_pair_id = NULL;
>     const char *driver = NULL;
>+    const char *iommufdId = NULL;
>     /* 'ramfb' property must be omitted unless it's to be enabled */
>     bool ramfb = pcisrc->ramfb == VIR_TRISTATE_SWITCH_ON;
>
>@@ -4786,6 +4787,9 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
>         teaming->persistent)
>         failover_pair_id = teaming->persistent;
>
>+    if (pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES)
>+        iommufdId = "iommufd0";
>+
>     if (virJSONValueObjectAdd(&props,
>                               "s:driver", driver,
>                               "s:host", host,
>@@ -4794,6 +4798,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
>                               "S:failover_pair_id", failover_pair_id,
>                               "S:display", qemuOnOffAuto(pcisrc->display),
>                               "B:ramfb", ramfb,
>+                              "S:iommufd", iommufdId,
>                               NULL) < 0)
>         return NULL;
>
>@@ -5210,6 +5215,9 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
>                             virQEMUCaps *qemuCaps)
> {
>     size_t i;
>+    g_autoptr(virJSONValue) props = NULL;
>+    int iommufd = 0;
>+    const char * iommufdId = "iommufd0";
>
>     for (i = 0; i < def->nhostdevs; i++) {
>         virDomainHostdevDef *hostdev = def->hostdevs[i];
>@@ -5238,6 +5246,17 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
>            if (hostdev->info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_UNASSIGNED)
>                continue;
>
>+            if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES && iommufd == 0) {
>+                iommufd = 1;
>+                if (qemuMonitorCreateObjectProps(&props, "iommufd",
>+                                                 iommufdId,
>+                                                 NULL) < 0)
>+                    return -1;

Formatting the iommufd object should be in a separate function, e.g.
qemuBuildIOMMUFDCommandLine, not here in qemuBuildHostdevCommandLine.

Moving it out also fixes the memory leak caused by reusal of 'props'
without freeing them.

Jano

>+
>+                if (qemuBuildObjectCommandlineFromJSON(cmd, props) < 0)
>+                    return -1;
>+            }
>+
>             if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0)
>                 return -1;
>
>-- 
>2.43.0
>