Implement a new iommufd attribute under hostdevs' PCI
subsystem driver that can be used to specify associated
iommufd object when launching a qemu VM.
Signed-off-by: Nathan Chen <nathanc@nvidia.com>
---
docs/formatdomain.rst | 8 ++++++++
src/conf/device_conf.c | 9 +++++++++
src/conf/device_conf.h | 1 +
src/conf/schemas/basictypes.rng | 5 +++++
src/qemu/qemu_command.c | 19 +++++++++++++++++++
5 files changed, 42 insertions(+)
diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
index 34dc9c3af7..a5c69dbcf4 100644
--- a/docs/formatdomain.rst
+++ b/docs/formatdomain.rst
@@ -4845,6 +4845,7 @@ or:
device; if PCI ROM loading is disabled through this attribute, attempts to
tweak the loading process further using the ``bar`` or ``file`` attributes
will be rejected. :since:`Since 4.3.0 (QEMU and KVM only)`.
+
``address``
The ``address`` element for USB devices has a ``bus`` and ``device``
attribute to specify the USB bus and device number the device appears at on
@@ -4885,6 +4886,13 @@ or:
found is "problematic" in some way, the generic vfio-pci driver
similarly be forced.
+ The ``<driver>`` element's ``iommufd`` attribute is used to specify
+ using the iommufd interface to propagate DMA mappings to the kernel,
+ instead of legacy VFIO. When the attribute is present, an iommufd
+ object will be created by the resulting qemu command. Libvirt will
+ open the /dev/iommu and VFIO device cdev, passing the associated
+ file descriptor numbers to the qemu command.
+
(Note: :since:`Since 1.0.5`, the ``name`` attribute has been
described to be used to select the type of PCI device assignment
("vfio", "kvm", or "xen"), but those values have been mostly
diff --git a/src/conf/device_conf.c b/src/conf/device_conf.c
index c278b81652..88979ecc39 100644
--- a/src/conf/device_conf.c
+++ b/src/conf/device_conf.c
@@ -60,6 +60,8 @@ int
virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node,
virDeviceHostdevPCIDriverInfo *driver)
{
+ virTristateBool iommufd;
+ driver->iommufd = false;
if (virXMLPropEnum(node, "name",
virDeviceHostdevPCIDriverNameTypeFromString,
VIR_XML_PROP_NONZERO,
@@ -67,6 +69,10 @@ virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node,
return -1;
}
+ if (virXMLPropTristateBool(node, "iommufd", VIR_XML_PROP_NONE, &iommufd) < 0)
+ return -1;
+ virTristateBoolToBool(iommufd, &driver->iommufd);
+
driver->model = virXMLPropString(node, "model");
return 0;
}
@@ -93,6 +99,9 @@ virDeviceHostdevPCIDriverInfoFormat(virBuffer *buf,
virBufferEscapeString(&driverAttrBuf, " model='%s'", driver->model);
+ if (driver->iommufd)
+ virBufferAddLit(&driverAttrBuf, " iommufd='yes'");
+
virXMLFormatElement(buf, "driver", &driverAttrBuf, NULL);
return 0;
}
diff --git a/src/conf/device_conf.h b/src/conf/device_conf.h
index e570f51824..7bdbd80b0a 100644
--- a/src/conf/device_conf.h
+++ b/src/conf/device_conf.h
@@ -47,6 +47,7 @@ VIR_ENUM_DECL(virDeviceHostdevPCIDriverName);
struct _virDeviceHostdevPCIDriverInfo {
virDeviceHostdevPCIDriverName name;
char *model;
+ bool iommufd;
};
typedef enum {
diff --git a/src/conf/schemas/basictypes.rng b/src/conf/schemas/basictypes.rng
index 2931e316b7..089fc0f1c2 100644
--- a/src/conf/schemas/basictypes.rng
+++ b/src/conf/schemas/basictypes.rng
@@ -673,6 +673,11 @@
<ref name="genericName"/>
</attribute>
</optional>
+ <optional>
+ <attribute name="iommufd">
+ <ref name="virYesNo"/>
+ </attribute>
+ </optional>
<empty/>
</element>
</define>
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index c538a9fb2f..8fd7527645 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -4738,6 +4738,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
g_autofree char *host = virPCIDeviceAddressAsString(&pcisrc->addr);
const char *failover_pair_id = NULL;
const char *driver = NULL;
+ const char *iommufdId = NULL;
/* 'ramfb' property must be omitted unless it's to be enabled */
bool ramfb = pcisrc->ramfb == VIR_TRISTATE_SWITCH_ON;
@@ -4771,6 +4772,9 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
teaming->persistent)
failover_pair_id = teaming->persistent;
+ if (pcisrc->driver.iommufd)
+ iommufdId = "iommufd0";
+
if (virJSONValueObjectAdd(&props,
"s:driver", driver,
"s:host", host,
@@ -4779,6 +4783,7 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
"S:failover_pair_id", failover_pair_id,
"S:display", qemuOnOffAuto(pcisrc->display),
"B:ramfb", ramfb,
+ "S:iommufd", iommufdId,
NULL) < 0)
return NULL;
@@ -5195,6 +5200,9 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
virQEMUCaps *qemuCaps)
{
size_t i;
+ g_autoptr(virJSONValue) props = NULL;
+ int iommufd = 0;
+ const char * iommufdId = "iommufd0";
for (i = 0; i < def->nhostdevs; i++) {
virDomainHostdevDef *hostdev = def->hostdevs[i];
@@ -5223,6 +5231,17 @@ qemuBuildHostdevCommandLine(virCommand *cmd,
if (hostdev->info->type == VIR_DOMAIN_DEVICE_ADDRESS_TYPE_UNASSIGNED)
continue;
+ if (subsys->u.pci.driver.iommufd && iommufd == 0) {
+ iommufd = 1;
+ if (qemuMonitorCreateObjectProps(&props, "iommufd",
+ iommufdId,
+ NULL) < 0)
+ return -1;
+
+ if (qemuBuildObjectCommandlineFromJSON(cmd, props) < 0)
+ return -1;
+ }
+
if (qemuCommandAddExtDevice(cmd, hostdev->info, def, qemuCaps) < 0)
return -1;
--
2.43.0
[cc-ing Laine and Andrea if they have a better memory of the time we
went from "legacy" passthrough to vfio]
On a Monday in 2025, Nathan Chen via Devel wrote:
>Implement a new iommufd attribute under hostdevs' PCI
>subsystem driver that can be used to specify associated
>iommufd object when launching a qemu VM.
This does not specify which iommufd object it is, just to use the
default one.
It's perfect for now, we might need a different element if using
anything else than iommufd0 starts making sense.
Also, I think it should fine not to expose the object in the XML since
it has configurable attributes now:
# qemu-system-x86_64 -object iommufd,?
iommufd options:
fd=<string>
>
>Signed-off-by: Nathan Chen <nathanc@nvidia.com>
>---
> docs/formatdomain.rst | 8 ++++++++
> src/conf/device_conf.c | 9 +++++++++
> src/conf/device_conf.h | 1 +
> src/conf/schemas/basictypes.rng | 5 +++++
> src/qemu/qemu_command.c | 19 +++++++++++++++++++
> 5 files changed, 42 insertions(+)
>
>diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
>index 34dc9c3af7..a5c69dbcf4 100644
>--- a/docs/formatdomain.rst
>+++ b/docs/formatdomain.rst
>@@ -4845,6 +4845,7 @@ or:
> device; if PCI ROM loading is disabled through this attribute, attempts to
> tweak the loading process further using the ``bar`` or ``file`` attributes
> will be rejected. :since:`Since 4.3.0 (QEMU and KVM only)`.
>+
> ``address``
> The ``address`` element for USB devices has a ``bus`` and ``device``
> attribute to specify the USB bus and device number the device appears at on
>@@ -4885,6 +4886,13 @@ or:
> found is "problematic" in some way, the generic vfio-pci driver
> similarly be forced.
>
>+ The ``<driver>`` element's ``iommufd`` attribute is used to specify
>+ using the iommufd interface to propagate DMA mappings to the kernel,
>+ instead of legacy VFIO. When the attribute is present, an iommufd
>+ object will be created by the resulting qemu command. Libvirt will
>+ open the /dev/iommu and VFIO device cdev, passing the associated
>+ file descriptor numbers to the qemu command.
>+
Should we resurrect the old attribute and use:
<driver name="iommufd"/>
The idea being that later in time, when it will no longer make sense
to use "legacy" VFIO, we will retire it again.
Also, referring to it as "legacy" is both premature (since iommufd does not
have the feature parity yet) and confusing in the passage of time.
> (Note: :since:`Since 1.0.5`, the ``name`` attribute has been
> described to be used to select the type of PCI device assignment
> ("vfio", "kvm", or "xen"), but those values have been mostly
>diff --git a/src/conf/device_conf.c b/src/conf/device_conf.c
>index c278b81652..88979ecc39 100644
>--- a/src/conf/device_conf.c
>+++ b/src/conf/device_conf.c
>@@ -60,6 +60,8 @@ int
> virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node,
> virDeviceHostdevPCIDriverInfo *driver)
> {
>+ virTristateBool iommufd;
>+ driver->iommufd = false;
> if (virXMLPropEnum(node, "name",
> virDeviceHostdevPCIDriverNameTypeFromString,
> VIR_XML_PROP_NONZERO,
>@@ -67,6 +69,10 @@ virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node,
> return -1;
> }
>
>+ if (virXMLPropTristateBool(node, "iommufd", VIR_XML_PROP_NONE, &iommufd) < 0)
>+ return -1;
>+ virTristateBoolToBool(iommufd, &driver->iommufd);
Storing this as 'bool' is losing information. We need to be able to tell
whether iommufd was not used because the user did not specify it or
whether it was not used because the user explicitly said no for future
compatibility reasons.
Jano
>+
> driver->model = virXMLPropString(node, "model");
> return 0;
> }
>@@ -93,6 +99,9 @@ virDeviceHostdevPCIDriverInfoFormat(virBuffer *buf,
>
> virBufferEscapeString(&driverAttrBuf, " model='%s'", driver->model);
>
>+ if (driver->iommufd)
>+ virBufferAddLit(&driverAttrBuf, " iommufd='yes'");
>+
> virXMLFormatElement(buf, "driver", &driverAttrBuf, NULL);
> return 0;
> }
On 11/6/2025 10:49 AM, Ján Tomko wrote:
>> Implement a new iommufd attribute under hostdevs' PCI
>> subsystem driver that can be used to specify associated
>> iommufd object when launching a qemu VM.
>
> This does not specify which iommufd object it is, just to use the
> default one.
>
> It's perfect for now, we might need a different element if using
> anything else than iommufd0 starts making sense.
>
> Also, I think it should fine not to expose the object in the XML since
> it has configurable attributes now:
>
> # qemu-system-x86_64 -object iommufd,?
> iommufd options:
> fd=<string>
>
Noted, will re-visit if anything else other than iommufd0 makes sense.
>>
>> Signed-off-by: Nathan Chen <nathanc@nvidia.com>
>> ---
>> docs/formatdomain.rst | 8 ++++++++
>> src/conf/device_conf.c | 9 +++++++++
>> src/conf/device_conf.h | 1 +
>> src/conf/schemas/basictypes.rng | 5 +++++
>> src/qemu/qemu_command.c | 19 +++++++++++++++++++
>> 5 files changed, 42 insertions(+)
>>
>> diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
>> index 34dc9c3af7..a5c69dbcf4 100644
>> --- a/docs/formatdomain.rst
>> +++ b/docs/formatdomain.rst
>> @@ -4845,6 +4845,7 @@ or:
>> device; if PCI ROM loading is disabled through this attribute,
>> attempts to
>> tweak the loading process further using the ``bar`` or ``file``
>> attributes
>> will be rejected. :since:`Since 4.3.0 (QEMU and KVM only)`.
>> +
>> ``address``
>> The ``address`` element for USB devices has a ``bus`` and ``device``
>> attribute to specify the USB bus and device number the device
>> appears at on
>> @@ -4885,6 +4886,13 @@ or:
>> found is "problematic" in some way, the generic vfio-pci driver
>> similarly be forced.
>>
>> + The ``<driver>`` element's ``iommufd`` attribute is used to specify
>> + using the iommufd interface to propagate DMA mappings to the kernel,
>> + instead of legacy VFIO. When the attribute is present, an iommufd
>> + object will be created by the resulting qemu command. Libvirt will
>> + open the /dev/iommu and VFIO device cdev, passing the associated
>> + file descriptor numbers to the qemu command.
>> +
>
> Should we resurrect the old attribute and use:
> <driver name="iommufd"/>
>
> The idea being that later in time, when it will no longer make sense
> to use "legacy" VFIO, we will retire it again.
>
> Also, referring to it as "legacy" is both premature (since iommufd does not
> have the feature parity yet) and confusing in the passage of time.
>
I think it would be better to leave it as-is for now, since there are
variant VFIO drivers besides vfio-pci that could be assigned to the
driver name attribute in tandem with enabling iommufd.
>> (Note: :since:`Since 1.0.5`, the ``name`` attribute has been
>> described to be used to select the type of PCI device assignment
>> ("vfio", "kvm", or "xen"), but those values have been mostly
>> diff --git a/src/conf/device_conf.c b/src/conf/device_conf.c
>> index c278b81652..88979ecc39 100644
>> --- a/src/conf/device_conf.c
>> +++ b/src/conf/device_conf.c
>> @@ -60,6 +60,8 @@ int
>> virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node,
>> virDeviceHostdevPCIDriverInfo
>> *driver)
>> {
>> + virTristateBool iommufd;
>> + driver->iommufd = false;
>> if (virXMLPropEnum(node, "name",
>> virDeviceHostdevPCIDriverNameTypeFromString,
>> VIR_XML_PROP_NONZERO,
>> @@ -67,6 +69,10 @@ virDeviceHostdevPCIDriverInfoParseXML(xmlNodePtr node,
>> return -1;
>> }
>>
>> + if (virXMLPropTristateBool(node, "iommufd", VIR_XML_PROP_NONE,
>> &iommufd) < 0)
>> + return -1;
>> + virTristateBoolToBool(iommufd, &driver->iommufd);
>
> Storing this as 'bool' is losing information. We need to be able to tell
> whether iommufd was not used because the user did not specify it or
> whether it was not used because the user explicitly said no for future
> compatibility reasons.
That makes sense, I will update it to use virTristateBool instead in the
next revision.
-Nathan
© 2016 - 2025 Red Hat, Inc.