Implement a new iommufd attribute under hostdevs' PCI
subsystem driver that can be used to specify associated
iommufd object when launching a qemu VM.
Signed-off-by: Nathan Chen <nathanc@nvidia.com>
---
docs/formatdomain.rst | 8 ++++++++
src/conf/device_conf.c | 12 ++++++++++++
src/conf/device_conf.h | 1 +
src/conf/schemas/basictypes.rng | 5 +++++
src/qemu/qemu_command.c | 19 +++++++++++++++++++
5 files changed, 45 insertions(+)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
index 160e7ad9c7..dcb24b1b23 100644
--- a/docs/formatdomain.rst
+++ b/docs/formatdomain.rst
@@ -4862,6 +4862,7 @@ or:
device; if PCI ROM loading is disabled through this attribute, attempts to
tweak the loading process further using the ``bar`` or ``file`` attributes
will be rejected. :since:`Since 4.3.0 (QEMU and KVM only)`.
+
``address``
The ``address`` element for USB devices has a ``bus`` and ``device``
attribute to specify the USB bus and device number the device appears at on
@@ -4902,6 +4903,13 @@ or:
found is "problematic" in some way, the generic vfio-pci driver
similarly be forced.
+ The ``<driver>`` element's ``iommufd`` attribute is used to specify
+ using the iommufd interface to propagate DMA mappings to the kernel,
+ instead of VFIO alone. When the attribute is present, an iommufd
+ object will be created by the resulting qemu command. Libvirt will
+ open the /dev/iommu and VFIO device cdev, passing the associated
+ file descriptor numbers to the qemu command.
+
(Note: :since:`Since 1.0.5`, the ``name`` attribute has been
described to be used to select the type of PCI device assignment
("vfio", "kvm", or "xen"), but those values have been mostly
diff --git a/src/conf/device_conf.c b/src/conf/device_conf.c
index c278b81652..7682236d65 100644
--- a/src/conf/device_conf.c
+++ b/src/conf/device_conf.c
@@ -60,6 +60,8 @@ int
virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node,
virDeviceHostdevPCIDriverInfo *driver)
{
+ virTristateBool iommufd;
+ driver->iommufd = false;
if (virXMLPropEnum(node, "name",
virDeviceHostdevPCIDriverNameTypeFromString,
VIR_XML_PROP_NONZERO,
@@ -67,6 +69,10 @@ virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node,
return -1;
}
+ if (virXMLPropTristateBool(node, "iommufd", VIR_XML_PROP_NONE, &iommufd) < 0)
+ return -1;
+ driver->iommufd = iommufd;
+
driver->model = virXMLPropString(node, "model");
return 0;
}
@@ -93,6 +99,12 @@ virDeviceHostdevPCIDriverInfoFormat(virBuffer *buf,
virBufferEscapeString(&driverAttrBuf, " model='%s'", driver->model);
+ if (driver->iommufd == VIR_TRISTATE_BOOL_YES) {
+ virBufferAddLit(&driverAttrBuf, " iommufd='yes'");
+ } else if (driver->iommufd == VIR_TRISTATE_BOOL_NO) {
+ virBufferAddLit(&driverAttrBuf, " iommufd='no'");
+ }
+
virXMLFormatElement(buf, "driver", &driverAttrBuf, NULL);
return 0;
}
diff --git a/src/conf/device_conf.h b/src/conf/device_conf.h
index e570f51824..116b959143 100644
--- a/src/conf/device_conf.h
+++ b/src/conf/device_conf.h
@@ -47,6 +47,7 @@ VIR_ENUM_DECL(virDeviceHostdevPCIDriverName);
struct _virDeviceHostdevPCIDriverInfo {
virDeviceHostdevPCIDriverName name;
char *model;
+ virTristateBool iommufd;
};
typedef enum {
diff --git a/src/conf/schemas/basictypes.rng b/src/conf/schemas/basictypes.rng
index 2931e316b7..089fc0f1c2 100644
--- a/src/conf/schemas/basictypes.rng
+++ b/src/conf/schemas/basictypes.rng
@@ -673,6 +673,11 @@
<ref name="genericName"/>
</attribute>
</optional>
+ <optional>
+ <attribute name="iommufd">
+ <ref name="virYesNo"/>
+ </attribute>
+ </optional>
<empty/>
</element>
</define>
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 5a834ef842..95d1c2ee98 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -4753,6 +4753,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
g_autofree char *host = virPCIDeviceAddressAsString(&pcisrc->addr);
const char *failover_pair_id = NULL;
const char *driver = NULL;
+ const char *iommufdId = NULL;
/* 'ramfb' property must be omitted unless it's to be enabled */
bool ramfb = pcisrc->ramfb == VIR_TRISTATE_SWITCH_ON;
@@ -4786,6 +4787,9 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
teaming->persistent)
failover_pair_id = teaming->persistent;
+ if (pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES)
+ iommufdId = "iommufd0";
+
if (virJSONValueObjectAdd(&props,
"s:driver", driver,
"s:host", host,
@@ -4794,6 +4798,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
"S:failover_pair_id", failover_pair_id,
"S:display", qemuOnOffAuto(pcisrc->display),
"B:ramfb", ramfb,
+ "S:iommufd", iommufdId,
NULL) < 0)
return NULL;
@@ -5210,6 +5215,9 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
virQEMUCaps *qemuCaps)
{
size_t i;
+ g_autoptr(virJSONValue) props = NULL;
+ int iommufd = 0;
+ const char * iommufdId = "iommufd0";
for (i = 0; i < def->nhostdevs; i++) {
virDomainHostdevDef *hostdev = def->hostdevs[i];
@@ -5238,6 +5246,17 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
if (hostdev->info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_UNASSIGNED)
continue;
+ if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES && iommufd == 0) {
+ iommufd = 1;
+ if (qemuMonitorCreateObjectProps(&props, "iommufd",
+ iommufdId,
+ NULL) < 0)
+ return -1;
+
+ if (qemuBuildObjectCommandlineFromJSON(cmd, props) < 0)
+ return -1;
+ }
+
if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0)
return -1;
--
2.43.0
On a Friday in 2025, Nathan Chen via Devel wrote:
To get rid of the "via Devel" part in git commit's Author: field,
set the format.from and format.forceInBodyFrom fields in your
gitconfig:
https://libvirt.org/submitting-patches.html#git-configuration
>Implement a new iommufd attribute under hostdevs' PCI
>subsystem driver that can be used to specify associated
>iommufd object when launching a qemu VM.
>
Also, I've applied most of the stuff I say here in my branch,
mostly to see if they are possible:
https://gitlab.com/janotomko/libvirt/-/tree/iommufd?ref_type=heads
git fetch https://gitlab.com/janotomko/libvirt.git iommufd
>Signed-off-by: Nathan Chen <nathanc@nvidia.com>
>---
> docs/formatdomain.rst | 8 ++++++++
> src/conf/device_conf.c | 12 ++++++++++++
> src/conf/device_conf.h | 1 +
> src/conf/schemas/basictypes.rng | 5 +++++
> src/qemu/qemu_command.c | 19 +++++++++++++++++++
> 5 files changed, 45 insertions(+)
>
>diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
>index 160e7ad9c7..dcb24b1b23 100644
>--- a/docs/formatdomain.rst
>+++ b/docs/formatdomain.rst
>@@ -4862,6 +4862,7 @@ or:
> device; if PCI ROM loading is disabled through this attribute, attempts to
> tweak the loading process further using the ``bar`` or ``file`` attributes
> will be rejected. :since:`Since 4.3.0 (QEMU and KVM only)`.
>+
Unrelated and unnecessary whitespace change (lots of others in the file
do not put a space between attributes)
> ``address``
> The ``address`` element for USB devices has a ``bus`` and ``device``
> attribute to specify the USB bus and device number the device appears at on
>@@ -4902,6 +4903,13 @@ or:
> found is "problematic" in some way, the generic vfio-pci driver
> similarly be forced.
>
>+ The ``<driver>`` element's ``iommufd`` attribute is used to specify
>+ using the iommufd interface to propagate DMA mappings to the kernel,
>+ instead of VFIO alone. When the attribute is present, an iommufd
>+ object will be created by the resulting qemu command. Libvirt will
>+ open the /dev/iommu and VFIO device cdev, passing the associated
>+ file descriptor numbers to the qemu command.
>+
> (Note: :since:`Since 1.0.5`, the ``name`` attribute has been
> described to be used to select the type of PCI device assignment
> ("vfio", "kvm", or "xen"), but those values have been mostly
>diff --git a/src/conf/device_conf.c b/src/conf/device_conf.c
>index c278b81652..7682236d65 100644
>--- a/src/conf/device_conf.c
>+++ b/src/conf/device_conf.c
>@@ -60,6 +60,8 @@ int
> virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node,
> virDeviceHostdevPCIDriverInfo *driver)
> {
>+ virTristateBool iommufd;
This intermediary variable is not needed
>+ driver->iommufd = false;
and this is not a bool variable.
> if (virXMLPropEnum(node, "name",
> virDeviceHostdevPCIDriverNameTypeFromString,
> VIR_XML_PROP_NONZERO,
>@@ -67,6 +69,10 @@ virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node,
> return -1;
> }
>
>+ if (virXMLPropTristateBool(node, "iommufd", VIR_XML_PROP_NONE, &iommufd) < 0)
>+ return -1;
Just let virXMLPropTristateBool handle the default and write into
driver->iommufd directly.
>+ driver->iommufd = iommufd;
>+
> driver->model = virXMLPropString(node, "model");
> return 0;
> }
>@@ -93,6 +99,12 @@ virDeviceHostdevPCIDriverInfoFormat(virBuffer *buf,
>
> virBufferEscapeString(&driverAttrBuf, " model='%s'", driver->model);
>
>+ if (driver->iommufd == VIR_TRISTATE_BOOL_YES) {
>+ virBufferAddLit(&driverAttrBuf, " iommufd='yes'");
>+ } else if (driver->iommufd == VIR_TRISTATE_BOOL_NO) {
>+ virBufferAddLit(&driverAttrBuf, " iommufd='no'");
>+ }
>+
> virXMLFormatElement(buf, "driver", &driverAttrBuf, NULL);
> return 0;
> }
>diff --git a/src/conf/device_conf.h b/src/conf/device_conf.h
>index e570f51824..116b959143 100644
>--- a/src/conf/device_conf.h
>+++ b/src/conf/device_conf.h
>@@ -47,6 +47,7 @@ VIR_ENUM_DECL(virDeviceHostdevPCIDriverName);
> struct _virDeviceHostdevPCIDriverInfo {
> virDeviceHostdevPCIDriverName name;
> char *model;
>+ virTristateBool iommufd;
> };
>
> typedef enum {
>diff --git a/src/conf/schemas/basictypes.rng b/src/conf/schemas/basictypes.rng
>index 2931e316b7..089fc0f1c2 100644
>--- a/src/conf/schemas/basictypes.rng
>+++ b/src/conf/schemas/basictypes.rng
>@@ -673,6 +673,11 @@
> <ref name="genericName"/>
> </attribute>
> </optional>
>+ <optional>
>+ <attribute name="iommufd">
>+ <ref name="virYesNo"/>
>+ </attribute>
>+ </optional>
> <empty/>
> </element>
> </define>
>diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
>index 5a834ef842..95d1c2ee98 100644
>--- a/src/qemu/qemu_command.c
>+++ b/src/qemu/qemu_command.c
>@@ -4753,6 +4753,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
> g_autofree char *host = virPCIDeviceAddressAsString(&pcisrc->addr);
> const char *failover_pair_id = NULL;
> const char *driver = NULL;
>+ const char *iommufdId = NULL;
> /* 'ramfb' property must be omitted unless it's to be enabled */
> bool ramfb = pcisrc->ramfb == VIR_TRISTATE_SWITCH_ON;
>
>@@ -4786,6 +4787,9 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
> teaming->persistent)
> failover_pair_id = teaming->persistent;
>
>+ if (pcisrc->driver.iommufd == VIR_TRISTATE_BOOL_YES)
>+ iommufdId = "iommufd0";
>+
> if (virJSONValueObjectAdd(&props,
> "s:driver", driver,
> "s:host", host,
>@@ -4794,6 +4798,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
> "S:failover_pair_id", failover_pair_id,
> "S:display", qemuOnOffAuto(pcisrc->display),
> "B:ramfb", ramfb,
>+ "S:iommufd", iommufdId,
> NULL) < 0)
> return NULL;
>
>@@ -5210,6 +5215,9 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
> virQEMUCaps *qemuCaps)
> {
> size_t i;
>+ g_autoptr(virJSONValue) props = NULL;
>+ int iommufd = 0;
>+ const char * iommufdId = "iommufd0";
>
> for (i = 0; i < def->nhostdevs; i++) {
> virDomainHostdevDef *hostdev = def->hostdevs[i];
>@@ -5238,6 +5246,17 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
> if (hostdev->info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_UNASSIGNED)
> continue;
>
>+ if (subsys->u.pci.driver.iommufd == VIR_TRISTATE_BOOL_YES && iommufd == 0) {
>+ iommufd = 1;
>+ if (qemuMonitorCreateObjectProps(&props, "iommufd",
>+ iommufdId,
>+ NULL) < 0)
>+ return -1;
Formatting the iommufd object should be in a separate function, e.g.
qemuBuildIOMMUFDCommandLine, not here in qemuBuildHostdevCommandLine.
Moving it out also fixes the memory leak caused by reusal of 'props'
without freeing them.
Jano
>+
>+ if (qemuBuildObjectCommandlineFromJSON(cmd, props) < 0)
>+ return -1;
>+ }
>+
> if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0)
> return -1;
>
>--
>2.43.0
>
© 2016 - 2025 Red Hat, Inc.