[PATCH v2 12/12] iommu/amd: Introduce IOMMUFD vIOMMU support for AMD

Suravee Suthikulpanit posted 12 patches 4 months, 1 week ago
There is a newer version of this series
[PATCH v2 12/12] iommu/amd: Introduce IOMMUFD vIOMMU support for AMD
Posted by Suravee Suthikulpanit 4 months, 1 week ago
Introduce struct amd_iommu_vminfo to store AMD HW-vIOMMU per-IOMMU data,
which is initialized when calling struct iommu_ops.viommu_init().

Currently, the struct amd_iommu_vminfo and amd_iommu_viommu_init() contain
base code to support nested domain allocation for vIOMMU using the struct
iommufd_viommu_ops.alloc_domain_nested.

Additional initialization will be added in subsequent patches.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 drivers/iommu/amd/amd_iommu.h       | 10 +++++
 drivers/iommu/amd/amd_iommu_types.h |  6 +++
 drivers/iommu/amd/iommu.c           | 61 ++++++++++++++++++++++++++++-
 drivers/iommu/amd/iommufd.c         |  8 ++++
 drivers/iommu/amd/iommufd.h         |  2 +
 include/uapi/linux/iommufd.h        | 19 +++++++++
 6 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 924152973d11..6cb929b657e4 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -169,6 +169,16 @@ static inline struct amd_iommu *get_amd_iommu_from_dev_data(struct iommu_dev_dat
 	return iommu_get_iommu_dev(dev_data->dev, struct amd_iommu, iommu);
 }
 
+static inline struct amd_iommu *get_amd_iommu_from_devid(u16 devid)
+{
+	struct amd_iommu *iommu;
+
+	for_each_iommu(iommu)
+		if (iommu->devid == devid)
+			return iommu;
+	return NULL;
+}
+
 static inline struct protection_domain *to_pdomain(struct iommu_domain *dom)
 {
 	return container_of(dom, struct protection_domain, domain);
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 9bc2e0e18978..dcecb5df2f72 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -17,6 +17,7 @@
 #include <linux/list.h>
 #include <linux/spinlock.h>
 #include <linux/pci.h>
+#include <linux/iommufd.h>
 #include <linux/irqreturn.h>
 #include <linux/io-pgtable.h>
 
@@ -1117,6 +1118,11 @@ struct amd_irte_ops {
 	void (*clear_allocated)(struct irq_remap_table *, int);
 };
 
+struct amd_iommu_vminfo {
+	struct iommufd_viommu core;
+	u32 iommu_devid;
+};
+
 #ifdef CONFIG_IRQ_REMAP
 extern struct amd_irte_ops irte_32_ops;
 extern struct amd_irte_ops irte_128_ops;
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index c1abb06126c1..e3503091cd65 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -3063,6 +3063,61 @@ static const struct iommu_dirty_ops amd_dirty_ops = {
 	.read_and_clear_dirty = amd_iommu_read_and_clear_dirty,
 };
 
+static size_t amd_iommu_get_viommu_size(struct device *dev, enum iommu_viommu_type viommu_type)
+{
+	if (viommu_type != IOMMU_VIOMMU_TYPE_AMD)
+		return 0;
+
+	return VIOMMU_STRUCT_SIZE(struct amd_iommu_vminfo, core);
+}
+
+/*
+ * This is called from the drivers/iommu/iommufd/viommu.c: iommufd_viommu_alloc_ioctl
+ */
+static int amd_iommu_viommu_init(struct iommufd_viommu *viommu,
+				 struct iommu_domain *parent,
+				 const struct iommu_user_data *user_data)
+{
+#if IS_ENABLED(CONFIG_AMD_IOMMU_IOMMUFD)
+	int ret;
+	struct amd_iommu *iommu;
+	struct iommu_viommu_amd data;
+	struct amd_iommu_vminfo *vminfo = container_of(viommu, struct amd_iommu_vminfo, core);
+
+	if (!user_data)
+		return -EINVAL;
+
+	ret = iommu_copy_struct_from_user(&data, user_data,
+					  IOMMU_VIOMMU_TYPE_AMD,
+					  reserved);
+	if (ret)
+		return ret;
+
+	iommu = get_amd_iommu_from_devid(data.iommu_devid);
+	if (!iommu)
+		return -ENODEV;
+
+	vminfo->iommu_devid = data.iommu_devid;
+
+	/* TODO: Add AMD HW-vIOMMU initialization code */
+
+	ret = iommu_copy_struct_to_user(user_data, &data,
+					IOMMU_VIOMMU_TYPE_AMD,
+					reserved);
+	if (ret)
+		goto err_out;
+
+	viommu->ops = &amd_viommu_ops;
+
+	return 0;
+
+err_out:
+	return ret;
+#else
+	return -EOPNOTSUPP;
+#endif /* CONFIG_AMD_IOMMU_IOMMUFD */
+}
+
 const struct iommu_ops amd_iommu_ops = {
 	.capable = amd_iommu_capable,
 	.hw_info = amd_iommufd_hw_info,
@@ -3088,7 +3143,11 @@ const struct iommu_ops amd_iommu_ops = {
 		.iotlb_sync	= amd_iommu_iotlb_sync,
 		.free		= amd_iommu_domain_free,
 		.enforce_cache_coherency = amd_iommu_enforce_cache_coherency,
-	}
+	},
+
+	/* For VIOMMU */
+	.get_viommu_size = amd_iommu_get_viommu_size,
+	.viommu_init = amd_iommu_viommu_init,
 };
 
 #ifdef CONFIG_IRQ_REMAP
diff --git a/drivers/iommu/amd/iommufd.c b/drivers/iommu/amd/iommufd.c
index 72eaaa923d04..d81e4ad17d9d 100644
--- a/drivers/iommu/amd/iommufd.c
+++ b/drivers/iommu/amd/iommufd.c
@@ -29,3 +29,11 @@ void *amd_iommufd_hw_info(struct device *dev, u32 *length, u32 *type)
 
 	return hwinfo;
 }
+
+/*
+ * See include/linux/iommufd.h
+ * struct iommufd_viommu_ops - vIOMMU specific operations
+ */
+const struct iommufd_viommu_ops amd_viommu_ops = {
+	.alloc_domain_nested = amd_iommu_alloc_domain_nested,
+};
diff --git a/drivers/iommu/amd/iommufd.h b/drivers/iommu/amd/iommufd.h
index f880be80a30d..0d59ef160780 100644
--- a/drivers/iommu/amd/iommufd.h
+++ b/drivers/iommu/amd/iommufd.h
@@ -7,6 +7,8 @@
 #define AMD_IOMMUFD_H
 
 #if IS_ENABLED(CONFIG_AMD_IOMMU_IOMMUFD)
+extern const struct iommufd_viommu_ops amd_viommu_ops;
+
 void *amd_iommufd_hw_info(struct device *dev, u32 *length, u32 *type);
 #else
 #define amd_iommufd_hw_info NULL
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 79d4ba43cd5f..e7084dbc5c95 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -1038,11 +1038,13 @@ struct iommu_fault_alloc {
  * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type
  * @IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV: NVIDIA Tegra241 CMDQV (extension for ARM
  *                                    SMMUv3) enabled ARM SMMUv3 type
+ * @IOMMU_VIOMMU_TYPE_AMD: AMD HW-vIOMMU type
  */
 enum iommu_viommu_type {
 	IOMMU_VIOMMU_TYPE_DEFAULT = 0,
 	IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1,
 	IOMMU_VIOMMU_TYPE_TEGRA241_CMDQV = 2,
+	IOMMU_VIOMMU_TYPE_AMD = 3,
 };
 
 /**
@@ -1061,6 +1063,23 @@ struct iommu_viommu_tegra241_cmdqv {
 	__aligned_u64 out_vintf_mmap_length;
 };
 
+/**
+ * struct iommu_viommu_amd - AMD vIOMMU Interface (IOMMU_VIOMMU_TYPE_AMD)
+ * @iommu_devid: Host IOMMU PCI device ID
+ * @viommu_devid: Guest vIOMMU PCI device ID
+ * @trans_devid: GPA->GVA translation device ID (host)
+ * @out_gid: (out) Guest ID
+ * @out_vfmmio_mmap_offset: (out) mmap offset for vIOMMU VF-MMIO
+ */
+struct iommu_viommu_amd {
+	__u32 iommu_devid;
+	__u32 viommu_devid;
+	__u32 trans_devid;
+	__u32 out_gid;
+	__aligned_u64 out_vfmmio_mmap_offset;
+	__u32 reserved; /* must be last */
+};
+
 /**
  * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC)
  * @size: sizeof(struct iommu_viommu_alloc)
-- 
2.34.1
Re: [PATCH v2 12/12] iommu/amd: Introduce IOMMUFD vIOMMU support for AMD
Posted by Nicolin Chen 4 months, 1 week ago
On Wed, Oct 01, 2025 at 06:09:54AM +0000, Suravee Suthikulpanit wrote:
> Introduce struct amd_iommu_vminfo to store AMD HW-vIOMMU per-IOMMU data,
> which is initialized when calling struct iommu_ops.viommu_init().
> 
> Currently, the struct amd_iommu_vminfo and amd_iommu_viommu_init() contain
> base code to support nested domain allocation for vIOMMU using the struct
> iommufd_viommu_ops.alloc_domain_nested.
> 
> Additional initialization will be added in subsequent patches.

This is the last patch in the series. You mean some future patch?

And pls briefly elaborate what is the additional initialization for.

> diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
> index c1abb06126c1..e3503091cd65 100644
> --- a/drivers/iommu/amd/iommu.c
> +++ b/drivers/iommu/amd/iommu.c
> @@ -3063,6 +3063,61 @@ static const struct iommu_dirty_ops amd_dirty_ops = {
>  	.read_and_clear_dirty = amd_iommu_read_and_clear_dirty,
>  };
>  
> +static size_t amd_iommu_get_viommu_size(struct device *dev, enum iommu_viommu_type viommu_type)
> +{
> +	if (viommu_type != IOMMU_VIOMMU_TYPE_AMD)
> +		return 0;
> +
> +	return VIOMMU_STRUCT_SIZE(struct amd_iommu_vminfo, core);
> +}
> +
> +/*
> + * This is called from the drivers/iommu/iommufd/viommu.c: iommufd_viommu_alloc_ioctl
> + */
> +static int amd_iommu_viommu_init(struct iommufd_viommu *viommu,
> +static int amd_iommu_viommu_init(struct iommufd_viommu *viommu,
> +				 struct iommu_domain *parent,
> +				 const struct iommu_user_data *user_data)
> +{
> +#if IS_ENABLED(CONFIG_AMD_IOMMU_IOMMUFD)

These two should be put in the iommufd.c file, and the header
should define the followings (next to amd_iommufd_hw_info):

 #if IS_ENABLED(CONFIG_AMD_IOMMU_IOMMUFD)
 void *amd_iommufd_hw_info(struct device *dev, u32 *length, u32 *type);
+size_t amd_iommu_get_viommu_size(struct device *dev,
+				 enum iommu_viommu_type viommu_type);
+int amd_iommu_viommu_init(struct iommufd_viommu *viommu,
+			  struct iommu_domain *parent,
+			  const struct iommu_user_data *user_data);
 else
 #define amd_iommufd_hw_info NULL
+#define amd_iommu_get_viommu_size NULL
+#define amd_iommu_viommu_init NULL
 #endif

The core would return -EOPNOTSUPP if either of them is NULL.

> +	if (ret)
> +		return ret;
> +
> +	iommu = get_amd_iommu_from_devid(data.iommu_devid);
> +	if (!iommu)
> +		return -ENODEV;
> +
> +	vminfo->iommu_devid = data.iommu_devid;

If you want the struct amd_iommu pointer, do this instead:

	iommu = container_of(viommu->iommu_dev, struct amd_iommu, iommu);

Then iommu_devid and get_amd_iommu_from_devid() aren't used anywhere
else. So both could be dropped.

> +/*
> + * See include/linux/iommufd.h
> + * struct iommufd_viommu_ops - vIOMMU specific operations
> + */
> +const struct iommufd_viommu_ops amd_viommu_ops = {
> +	.alloc_domain_nested = amd_iommu_alloc_domain_nested,
> +};

Unfortunately, a viommu_ops with alloc_domain_nested is incomplete,
IMHO. If this series gets merged alone, it declares that the kernel
now supports AMD IOMMU's virtualization, which actually won't work
without a cache invalidation op (SW) or hw_queue (HW-acceleration).

> diff --git a/drivers/iommu/amd/iommufd.h b/drivers/iommu/amd/iommufd.h
> index f880be80a30d..0d59ef160780 100644
> --- a/drivers/iommu/amd/iommufd.h
> +++ b/drivers/iommu/amd/iommufd.h
> @@ -7,6 +7,8 @@
>  #define AMD_IOMMUFD_H
>  
>  #if IS_ENABLED(CONFIG_AMD_IOMMU_IOMMUFD)
> +extern const struct iommufd_viommu_ops amd_viommu_ops;

You don't need this if defining the amd_iommu_get_viommu_size and
amd_iommu_viommu_init here.

> +/**
> + * struct iommu_viommu_amd - AMD vIOMMU Interface (IOMMU_VIOMMU_TYPE_AMD)
> + * @iommu_devid: Host IOMMU PCI device ID

Though I don't think you need this, just curious, how does user
space know the host PCI device ID?

> + * @viommu_devid: Guest vIOMMU PCI device ID

This isn't used in the patch, so I am not sure. But it sounds like
a vDEVICE virt_id to me. Though it probably works for the AMD case
because AMD IOMMU is per PCI device (IIRC), I think it'd be better
to define a vDEVICE forwarding this ID. I don't know, but for CC,
AMD might end up with a vDEVICE object anyway?

> + * @trans_devid: GPA->GVA translation device ID (host)

This is unclear to me either, and it's not being used, so I cannot
tell what it is for. But it feels like a part of vDEVICE object..

> + * @out_gid: (out) Guest ID

Again, not being used, needs some elaboration.

> + * @out_vfmmio_mmap_offset: (out) mmap offset for vIOMMU VF-MMIO

"(out)" is redudant.

@out_vfmmio_mmap_offset: mmap offset argument for vIOMMU VF-MMIO

And you could have an @out_vfmmio_mmap_length too, even if it is
just for validation in the iommufd core.

> + */
> +struct iommu_viommu_amd {
> +	__u32 iommu_devid;
> +	__u32 viommu_devid;
> +	__u32 trans_devid;
> +	__u32 out_gid;
> +	__aligned_u64 out_vfmmio_mmap_offset;
> +	__u32 reserved; /* must be last */

"reserved" is useless here.

Usually we add reserved field for paddings, and we usually call
it "__reserved".

struct iommu_viommu_amd is perfectly 64-bit aligned without the
last "reserved" field. I would drop it.

Nicolin
Re: [PATCH v2 12/12] iommu/amd: Introduce IOMMUFD vIOMMU support for AMD
Posted by Jason Gunthorpe 4 months ago
On Thu, Oct 02, 2025 at 01:05:32PM -0700, Nicolin Chen wrote:
> > +/*
> > + * See include/linux/iommufd.h
> > + * struct iommufd_viommu_ops - vIOMMU specific operations
> > + */
> > +const struct iommufd_viommu_ops amd_viommu_ops = {
> > +	.alloc_domain_nested = amd_iommu_alloc_domain_nested,
> > +};
> 
> Unfortunately, a viommu_ops with alloc_domain_nested is incomplete,
> IMHO. If this series gets merged alone, it declares that the kernel
> now supports AMD IOMMU's virtualization, which actually won't work
> without a cache invalidation op (SW) or hw_queue (HW-acceleration).

Yeah, I'd move this patch to a series implementing viommu fully.

I'm pretty sure this series is OK as is without it, but it would be
good to at least share a sketch of the viommu support to be sure
before merging it.

Jason