The nested domain is allocated with IOMMU_DOMAIN_NESTED type to store
stage-1 translation (i.e. GVA->GPA). This includes the GCR3 root pointer
table along with guest page tables. The struct iommu_hwpt_amd_guest
contains this information, and is passed from user-space as a parameter
of the struct iommu_ops.domain_alloc_nested().
Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
drivers/iommu/amd/Makefile | 2 +-
drivers/iommu/amd/amd_iommu.h | 4 ++
drivers/iommu/amd/amd_iommu_types.h | 31 +++++++----
drivers/iommu/amd/nested.c | 84 +++++++++++++++++++++++++++++
4 files changed, 110 insertions(+), 11 deletions(-)
create mode 100644 drivers/iommu/amd/nested.c
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index 5ae46d99a45b..afa12ca2110e 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-y += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o
-obj-$(CONFIG_AMD_IOMMU_IOMMUFD) += iommufd.o
+obj-$(CONFIG_AMD_IOMMU_IOMMUFD) += iommufd.o nested.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index d533bb8851ea..3730d8bbe6dc 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -202,4 +202,8 @@ amd_iommu_make_clear_dte(struct iommu_dev_data *dev_data, struct dev_table_entry
new->data128[1] = 0;
}
+/* NESTED */
+struct iommu_domain *
+amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
+ const struct iommu_user_data *user_data);
#endif /* AMD_IOMMU_H */
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index c34604cf1811..9374e6f7a19d 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -20,6 +20,8 @@
#include <linux/irqreturn.h>
#include <linux/io-pgtable.h>
+#include <uapi/linux/iommufd.h>
+
/*
* Maximum number of IOMMUs supported
*/
@@ -586,6 +588,25 @@ struct pdom_iommu_info {
u32 refcnt; /* Count of attached dev/pasid per domain/IOMMU */
};
+/*
+ * Structure defining one entry in the device table
+ */
+struct dev_table_entry {
+ union {
+ u64 data[4];
+ u128 data128[2];
+ };
+};
+
+/*
+ * Nested domain is specifically used for nested translation
+ */
+struct nested_domain {
+ struct iommu_domain domain; /* generic domain handle used by iommu core code */
+ u16 id; /* the domain id written to the device table */
+ struct iommu_hwpt_amd_guest gdte; /* Guest vIOMMU DTE */
+};
+
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
@@ -895,16 +916,6 @@ extern struct list_head amd_iommu_pci_seg_list;
*/
extern struct list_head amd_iommu_list;
-/*
- * Structure defining one entry in the device table
- */
-struct dev_table_entry {
- union {
- u64 data[4];
- u128 data128[2];
- };
-};
-
/*
* Structure defining one entry in the command buffer
*/
diff --git a/drivers/iommu/amd/nested.c b/drivers/iommu/amd/nested.c
new file mode 100644
index 000000000000..0ab5d65ec283
--- /dev/null
+++ b/drivers/iommu/amd/nested.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#define dev_fmt(fmt) "AMD-Vi: " fmt
+
+#include <linux/iommu.h>
+#include <uapi/linux/iommufd.h>
+
+#include "amd_iommu.h"
+
+static const struct iommu_domain_ops nested_domain_ops;
+
+static inline struct nested_domain *to_ndomain(struct iommu_domain *dom)
+{
+ return container_of(dom, struct nested_domain, domain);
+}
+
+/*
+ * This function is assigned to struct iommufd_viommu_ops.alloc_domain_nested()
+ * during the call to struct iommu_ops.viommu_init().
+ */
+struct iommu_domain *
+amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
+ const struct iommu_user_data *user_data)
+{
+ int ret;
+ struct iommu_hwpt_amd_guest gdte;
+ struct nested_domain *ndom;
+
+ if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ ret = iommu_copy_struct_from_user(&gdte, user_data,
+ IOMMU_HWPT_DATA_AMD_GUEST,
+ dte);
+ if (ret)
+ return ERR_PTR(ret);
+
+ ndom = kzalloc(sizeof(*ndom), GFP_KERNEL);
+ if (!ndom)
+ return ERR_PTR(-ENOMEM);
+
+ ndom->domain.ops = &nested_domain_ops;
+ ndom->domain.type = IOMMU_DOMAIN_NESTED;
+ memcpy(&ndom->gdte, &gdte, sizeof(gdte));
+
+ /*
+ * Normally, when a guest has multiple pass-through devices,
+ * the IOMMU driver setup DTEs with the same stage-2 table and
+ * use the same host domain ID (hDomId). In case of nested translation,
+ * if the guest setup different stage-1 tables with same PASID,
+ * IOMMU would use the same TLB tag. This will results in TLB
+ * aliasing issue.
+ *
+ * Workaround the issue by allocating per-device hDomID for nested
+ * domain (i.e. ndom->id). This require per-device IOMMU TLB invalidation
+ * with corresponded hDomId on the host side when updating stage-2 table.
+ */
+ ndom->id = amd_iommu_pdom_id_alloc();
+ if (ndom->id <= 0) {
+ ret = -ENOSPC;
+ goto out_err;
+ }
+
+ return &ndom->domain;
+out_err:
+ kfree(ndom);
+ return ERR_PTR(ret);
+}
+
+static void nested_domain_free(struct iommu_domain *dom)
+{
+ struct nested_domain *ndom = to_ndomain(dom);
+
+ amd_iommu_pdom_id_free(ndom->id);
+ kfree(ndom);
+}
+
+static const struct iommu_domain_ops nested_domain_ops = {
+ .free = nested_domain_free,
+};
+
--
2.34.1