Implement "iommufdId" and "iommufdFd" attributes for "hostdev" devices that
can be used to specify associated iommufd object and fd for externally opening
the /dev/iommu and VFIO cdev, when launching a qemu VM.
Signed-off-by: Nathan Chen <nathanc@nvidia.com>
---
src/conf/domain_conf.c | 31 +++++++++++++++++++++++++++++++
src/conf/domain_conf.h | 2 ++
src/conf/domain_validate.c | 14 ++++++++++++++
src/conf/schemas/domaincommon.rng | 17 +++++++++++++++++
src/qemu/qemu_command.c | 2 ++
5 files changed, 66 insertions(+)
diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 9330c47b7a..47b7e9acb1 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -13582,6 +13582,19 @@ virDomainVideoDefParseXML(virDomainXMLOption *xmlopt,
return g_steal_pointer(&def);
}
+static void
+virDomainHostdevDefIommufdParseXML(xmlXPathContextPtr ctxt,
+ char** iommufdId,
+ char** iommufdFd)
+{
+ g_autofree char *iommufdIdtmp = virXPathString("string(./iommufdId)", ctxt);
+ g_autofree char *iommufdFdtmp = virXPathString("string(./iommufdFd)", ctxt);
+ if (iommufdIdtmp)
+ *iommufdId = g_steal_pointer(&iommufdIdtmp);
+ if (iommufdFdtmp)
+ *iommufdFd = g_steal_pointer(&iommufdFdtmp);
+}
+
static virDomainHostdevDef *
virDomainHostdevDefParseXML(virDomainXMLOption *xmlopt,
xmlNodePtr node,
@@ -13656,6 +13669,8 @@ virDomainHostdevDefParseXML(virDomainXMLOption *xmlopt,
if (virDomainNetTeamingInfoParseXML(ctxt, &def->teaming) < 0)
goto error;
+ virDomainHostdevDefIommufdParseXML(ctxt, &def->iommufdId, &def->iommufdFd);
+
return def;
error:
@@ -21193,6 +21208,16 @@ virDomainHostdevDefCheckABIStability(virDomainHostdevDef *src,
}
}
+ if (src->iommufdId && dst->iommufdId) {
+ if (STRNEQ(src->iommufdId, dst->iommufdId))
+ return false;
+ }
+
+ if (src->iommufdFd && dst->iommufdFd) {
+ if (STRNEQ(src->iommufdFd, dst->iommufdFd))
+ return false;
+ }
+
if (!virDomainDeviceInfoCheckABIStability(src->info, dst->info))
return false;
@@ -27511,6 +27536,12 @@ virDomainHostdevDefFormat(virBuffer *buf,
if (def->shareable)
virBufferAddLit(buf, "<shareable/>\n");
+ if (def->iommufdId) {
+ virBufferAsprintf(buf, "<iommufdId>%s</iommufdId>\n", def->iommufdId);
+ if (def->iommufdFd)
+ virBufferAsprintf(buf, "<iommufdFd>%s</iommufdFd>\n", def->iommufdFd);
+ }
+
virDomainDeviceInfoFormat(buf, def->info, flags | VIR_DOMAIN_DEF_FORMAT_ALLOW_BOOT
| VIR_DOMAIN_DEF_FORMAT_ALLOW_ROM);
diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
index ce447e9823..78507d78b7 100644
--- a/src/conf/domain_conf.h
+++ b/src/conf/domain_conf.h
@@ -375,6 +375,8 @@ struct _virDomainHostdevDef {
virDomainHostdevCaps caps;
} source;
virDomainNetTeamingInfo *teaming;
+ char *iommufdId;
+ char *iommufdFd;
virDomainDeviceInfo *info; /* Guest address */
};
diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
index 88649ba0b9..292398b115 100644
--- a/src/conf/domain_validate.c
+++ b/src/conf/domain_validate.c
@@ -1401,6 +1401,20 @@ virDomainDefHostdevValidate(const virDomainDef *def)
ramfbEnabled = true;
}
}
+
+ /* Check for unsupported conditions:
+ * - A hostdev's iommufd fd number is defined but its iommufd id
+ * number is not
+ * - An iommu device is defined, iommufd is not defined,
+ * but a hostdev's iommufd id or fd are defined */
+ if ((!dev->iommufdId && dev->iommufdFd) ||
+ (def->iommu && def->niommus > 0 &&
+ !def->iommu[0]->iommufd &&
+ (dev->iommufdId || dev->iommufdFd))) {
+ virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
+ _("Unsupported hostdev iommufd configuration"));
+ return -1;
+ }
}
return 0;
diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
index 41db338c71..6c1f901eb4 100644
--- a/src/conf/schemas/domaincommon.rng
+++ b/src/conf/schemas/domaincommon.rng
@@ -6442,6 +6442,12 @@
<optional>
<ref name="address"/>
</optional>
+ <optional>
+ <ref name="iommufdId"/>
+ </optional>
+ <optional>
+ <ref name="iommufdFd"/>
+ </optional>
<optional>
<element name="readonly">
<empty/>
@@ -7809,6 +7815,17 @@
</element>
</define>
+ <define name="iommufdId">
+ <element name="iommufdId">
+ <text/>
+ </element>
+ </define>
+ <define name="iommufdFd">
+ <element name="iommufdFd">
+ <text/>
+ </element>
+ </define>
+
<define name="deviceBoot">
<element name="boot">
<attribute name="order">
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index d683d0eef7..3a8b71a00a 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -4724,6 +4724,8 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
"S:failover_pair_id", failover_pair_id,
"S:display", qemuOnOffAuto(pcisrc->display),
"B:ramfb", ramfb,
+ "S:iommufd", dev->iommufdId,
+ "S:fd", dev->iommufdFd,
NULL) < 0)
return NULL;
--
2.43.0
On Thu, May 15, 2025 at 01:36:41PM -0700, Nathan Chen via Devel wrote:
> Implement "iommufdId" and "iommufdFd" attributes for "hostdev" devices that
> can be used to specify associated iommufd object and fd for externally opening
> the /dev/iommu and VFIO cdev, when launching a qemu VM.
What does it mean if we enable iommufd on the 'hostdev' without enabling
iommufd on the 'iommu' device, or vica-verca ?
IMHO it seems like we should be enabling iommufd once only.
>
> Signed-off-by: Nathan Chen <nathanc@nvidia.com>
> ---
> src/conf/domain_conf.c | 31 +++++++++++++++++++++++++++++++
> src/conf/domain_conf.h | 2 ++
> src/conf/domain_validate.c | 14 ++++++++++++++
> src/conf/schemas/domaincommon.rng | 17 +++++++++++++++++
> src/qemu/qemu_command.c | 2 ++
> 5 files changed, 66 insertions(+)
>
> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> index 9330c47b7a..47b7e9acb1 100644
> --- a/src/conf/domain_conf.c
> +++ b/src/conf/domain_conf.c
> @@ -13582,6 +13582,19 @@ virDomainVideoDefParseXML(virDomainXMLOption *xmlopt,
> return g_steal_pointer(&def);
> }
>
> +static void
> +virDomainHostdevDefIommufdParseXML(xmlXPathContextPtr ctxt,
> + char** iommufdId,
> + char** iommufdFd)
> +{
> + g_autofree char *iommufdIdtmp = virXPathString("string(./iommufdId)", ctxt);
> + g_autofree char *iommufdFdtmp = virXPathString("string(./iommufdFd)", ctxt);
> + if (iommufdIdtmp)
> + *iommufdId = g_steal_pointer(&iommufdIdtmp);
> + if (iommufdFdtmp)
> + *iommufdFd = g_steal_pointer(&iommufdFdtmp);
> +}
> +
> static virDomainHostdevDef *
> virDomainHostdevDefParseXML(virDomainXMLOption *xmlopt,
> xmlNodePtr node,
> @@ -13656,6 +13669,8 @@ virDomainHostdevDefParseXML(virDomainXMLOption *xmlopt,
> if (virDomainNetTeamingInfoParseXML(ctxt, &def->teaming) < 0)
> goto error;
>
> + virDomainHostdevDefIommufdParseXML(ctxt, &def->iommufdId, &def->iommufdFd);
> +
> return def;
>
> error:
> @@ -21193,6 +21208,16 @@ virDomainHostdevDefCheckABIStability(virDomainHostdevDef *src,
> }
> }
>
> + if (src->iommufdId && dst->iommufdId) {
> + if (STRNEQ(src->iommufdId, dst->iommufdId))
> + return false;
> + }
> +
> + if (src->iommufdFd && dst->iommufdFd) {
> + if (STRNEQ(src->iommufdFd, dst->iommufdFd))
> + return false;
> + }
> +
> if (!virDomainDeviceInfoCheckABIStability(src->info, dst->info))
> return false;
>
> @@ -27511,6 +27536,12 @@ virDomainHostdevDefFormat(virBuffer *buf,
> if (def->shareable)
> virBufferAddLit(buf, "<shareable/>\n");
>
> + if (def->iommufdId) {
> + virBufferAsprintf(buf, "<iommufdId>%s</iommufdId>\n", def->iommufdId);
> + if (def->iommufdFd)
> + virBufferAsprintf(buf, "<iommufdFd>%s</iommufdFd>\n", def->iommufdFd);
> + }
> +
> virDomainDeviceInfoFormat(buf, def->info, flags | VIR_DOMAIN_DEF_FORMAT_ALLOW_BOOT
> | VIR_DOMAIN_DEF_FORMAT_ALLOW_ROM);
>
> diff --git a/src/conf/domain_conf.h b/src/conf/domain_conf.h
> index ce447e9823..78507d78b7 100644
> --- a/src/conf/domain_conf.h
> +++ b/src/conf/domain_conf.h
> @@ -375,6 +375,8 @@ struct _virDomainHostdevDef {
> virDomainHostdevCaps caps;
> } source;
> virDomainNetTeamingInfo *teaming;
> + char *iommufdId;
> + char *iommufdFd;
> virDomainDeviceInfo *info; /* Guest address */
> };
>
> diff --git a/src/conf/domain_validate.c b/src/conf/domain_validate.c
> index 88649ba0b9..292398b115 100644
> --- a/src/conf/domain_validate.c
> +++ b/src/conf/domain_validate.c
> @@ -1401,6 +1401,20 @@ virDomainDefHostdevValidate(const virDomainDef *def)
> ramfbEnabled = true;
> }
> }
> +
> + /* Check for unsupported conditions:
> + * - A hostdev's iommufd fd number is defined but its iommufd id
> + * number is not
> + * - An iommu device is defined, iommufd is not defined,
> + * but a hostdev's iommufd id or fd are defined */
> + if ((!dev->iommufdId && dev->iommufdFd) ||
> + (def->iommu && def->niommus > 0 &&
> + !def->iommu[0]->iommufd &&
> + (dev->iommufdId || dev->iommufdFd))) {
> + virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
> + _("Unsupported hostdev iommufd configuration"));
> + return -1;
> + }
> }
>
> return 0;
> diff --git a/src/conf/schemas/domaincommon.rng b/src/conf/schemas/domaincommon.rng
> index 41db338c71..6c1f901eb4 100644
> --- a/src/conf/schemas/domaincommon.rng
> +++ b/src/conf/schemas/domaincommon.rng
> @@ -6442,6 +6442,12 @@
> <optional>
> <ref name="address"/>
> </optional>
> + <optional>
> + <ref name="iommufdId"/>
> + </optional>
> + <optional>
> + <ref name="iommufdFd"/>
> + </optional>
> <optional>
> <element name="readonly">
> <empty/>
> @@ -7809,6 +7815,17 @@
> </element>
> </define>
>
> + <define name="iommufdId">
> + <element name="iommufdId">
> + <text/>
> + </element>
> + </define>
> + <define name="iommufdFd">
> + <element name="iommufdFd">
> + <text/>
> + </element>
> + </define>
> +
> <define name="deviceBoot">
> <element name="boot">
> <attribute name="order">
> diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
> index d683d0eef7..3a8b71a00a 100644
> --- a/src/qemu/qemu_command.c
> +++ b/src/qemu/qemu_command.c
> @@ -4724,6 +4724,8 @@ qemuBuildPCIHostdevDevProps(const virDomainDef *def,
> "S:failover_pair_id", failover_pair_id,
> "S:display", qemuOnOffAuto(pcisrc->display),
> "B:ramfb", ramfb,
> + "S:iommufd", dev->iommufdId,
> + "S:fd", dev->iommufdFd,
> NULL) < 0)
> return NULL;
>
> --
> 2.43.0
>
With regards,
Daniel
--
|: https://berrange.com -o- https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o- https://fstop138.berrange.com :|
|: https://entangle-photo.org -o- https://www.instagram.com/dberrange :|
On 5/20/2025 6:05 AM, Daniel P. Berrangé wrote: >> Implement "iommufdId" and "iommufdFd" attributes for "hostdev" devices that >> can be used to specify associated iommufd object and fd for externally opening >> the /dev/iommu and VFIO cdev, when launching a qemu VM. > What does it mean if we enable iommufd on the 'hostdev' without enabling > iommufd on the 'iommu' device, or vica-verca ? > There are checks in this series that prevent enabling iommufd on the hostdev without enabling iommufd on the iommu device as it would not make sense to link a hostdev with a non-existent iommufd object. If enabling iommufd on the iommu device and not enabling iommufd on a hostdev, qemu will fall back to legacy VFIO behavior for the hostdev and the iommufd object will not be used for passthrough device mappings. > IMHO it seems like we should be enabling iommufd once only. I can see why we should be enabling iommufd once only for the initial iommufd object, but we still need to provide an option to select legacy VFIO or iommufd for each hostdev definition. And if we would like to enable iommufd once only for the initial iommufd object, would it make more sense to move the iommufd attribute outside of the <iommu> stanza to account for the case where we have multiple <iommu> definitions? Thanks, Nathan
© 2016 - 2025 Red Hat, Inc.