The nested domain is allocated with IOMMU_DOMAIN_NESTED type to store
stage-1 translation (i.e. GVA->GPA). This includes the GCR3 root pointer
table along with guest page tables. The struct iommu_hwpt_amd_guest
contains this information, and is passed from user-space as a parameter
of the struct iommu_ops.domain_alloc_nested().
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
drivers/iommu/amd/Makefile | 2 +-
drivers/iommu/amd/amd_iommu.h | 5 +++
drivers/iommu/amd/amd_iommu_types.h | 33 +++++++++-----
drivers/iommu/amd/iommu.c | 14 +++++-
drivers/iommu/amd/nested.c | 67 +++++++++++++++++++++++++++++
5 files changed, 109 insertions(+), 12 deletions(-)
create mode 100644 drivers/iommu/amd/nested.c
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index 5ae46d99a45b..afa12ca2110e 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-y += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o
-obj-$(CONFIG_AMD_IOMMU_IOMMUFD) += iommufd.o
+obj-$(CONFIG_AMD_IOMMU_IOMMUFD) += iommufd.o nested.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index d533bb8851ea..cc1f14899dfe 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -8,6 +8,7 @@
#define AMD_IOMMU_H
#include <linux/iommu.h>
+#include <uapi/linux/iommufd.h>
#include "amd_iommu_types.h"
@@ -202,4 +203,8 @@ amd_iommu_make_clear_dte(struct iommu_dev_data *dev_data, struct dev_table_entry
new->data128[1] = 0;
}
+/* NESTED */
+struct iommu_domain *
+amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
+ const struct iommu_user_data *user_data);
#endif /* AMD_IOMMU_H */
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index d8c755b2045d..ba27fad77b57 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -20,6 +20,8 @@
#include <linux/irqreturn.h>
#include <linux/io-pgtable.h>
+#include <uapi/linux/iommufd.h>
+
/*
* Maximum number of IOMMUs supported
*/
@@ -604,6 +606,25 @@ struct protection_domain {
struct list_head dev_data_list; /* List of pdom_dev_data */
};
+/*
+ * Structure defining one entry in the device table
+ */
+struct dev_table_entry {
+ union {
+ u64 data[4];
+ u128 data128[2];
+ };
+};
+
+/*
+ * Nested domain is specifically used for nested translation
+ */
+struct nested_domain {
+ struct iommu_domain domain; /* generic domain handle used by iommu core code */
+ u16 id; /* the domain id written to the device table */
+ struct dev_table_entry guest_dte; /* Guest vIOMMU DTE */
+};
+
/*
* This structure contains information about one PCI segment in the system.
*/
@@ -857,6 +878,8 @@ struct iommu_dev_data {
struct list_head list; /* For domain->dev_list */
struct llist_node dev_data_list; /* For global dev_data_list */
struct protection_domain *domain; /* Domain the device is bound to */
+ struct protection_domain *parent; /* Parent Domain the device is bound to */
+ struct nested_domain *ndom; /* Nested Domain the device is bound to */
struct gcr3_tbl_info gcr3_info; /* Per-device GCR3 table */
struct device *dev;
u16 devid; /* PCI Device ID */
@@ -894,16 +917,6 @@ extern struct list_head amd_iommu_pci_seg_list;
*/
extern struct list_head amd_iommu_list;
-/*
- * Structure defining one entry in the device table
- */
-struct dev_table_entry {
- union {
- u64 data[4];
- u128 data128[2];
- };
-};
-
/*
* Structure defining one entry in the command buffer
*/
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index facee0f7a131..c1abb06126c1 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2613,6 +2613,9 @@ amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
case IOMMU_HWPT_ALLOC_DIRTY_TRACKING:
case IOMMU_HWPT_ALLOC_NEST_PARENT:
case IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_NEST_PARENT:
+ {
+ struct iommu_domain *dom;
+
/*
* Allocate domain with v1 page table for dirty tracking
* and/or Nest parent.
@@ -2620,7 +2623,16 @@ amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&
!amd_iommu_hd_support(iommu))
break;
- return do_iommu_domain_alloc(dev, flags, PD_MODE_V1);
+
+ dom = do_iommu_domain_alloc(dev, flags, PD_MODE_V1);
+ if (!IS_ERR_VALUE(dom) &&
+ (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)) {
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
+
+ dev_data->parent = to_pdomain(dom);
+ }
+ return dom;
+ }
case IOMMU_HWPT_ALLOC_PASID:
/* Allocate domain with v2 page table if IOMMU supports PASID. */
if (!amd_iommu_pasid_supported())
diff --git a/drivers/iommu/amd/nested.c b/drivers/iommu/amd/nested.c
new file mode 100644
index 000000000000..11a0237174bb
--- /dev/null
+++ b/drivers/iommu/amd/nested.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt) pr_fmt(fmt)
+
+#include <linux/iommu.h>
+
+#include "amd_iommu.h"
+#include "amd_iommu_types.h"
+
+static const struct iommu_domain_ops nested_domain_ops = {
+ .free = amd_iommu_domain_free,
+};
+
+static inline struct nested_domain *to_ndomain(struct iommu_domain *dom)
+{
+ return container_of(dom, struct nested_domain, domain);
+}
+
+struct iommu_domain *
+amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
+ const struct iommu_user_data *user_data)
+{
+ int ret;
+ struct iommu_hwpt_amd_guest gdte;
+ struct nested_domain *ndom;
+
+ if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ /*
+ * Need to make sure size of struct iommu_hwpt_amd_guest and
+ * struct dev_table_entry are the same since it will be copied
+ * from one to the other later on.
+ */
+ if (WARN_ON(sizeof(struct dev_table_entry) != sizeof(gdte)))
+ return ERR_PTR(-EINVAL);
+
+ ret = iommu_copy_struct_from_user(&gdte, user_data,
+ IOMMU_HWPT_DATA_AMD_GUEST,
+ dte);
+ if (ret)
+ return ERR_PTR(ret);
+
+ ndom = kzalloc(sizeof(*ndom), GFP_KERNEL);
+ if (IS_ERR(ndom))
+ return ERR_PTR(-ENOMEM);
+
+ ndom->domain.ops = &nested_domain_ops;
+ ndom->domain.type = IOMMU_DOMAIN_NESTED;
+ memcpy(&ndom->guest_dte, &gdte, sizeof(struct dev_table_entry));
+
+ /* Due to possible aliasing issue use per-device nested domain ID */
+ ndom->id = amd_iommu_pdom_id_alloc();
+ if (ndom->id <= 0) {
+ ret = ndom->id;
+ goto out_err;
+ }
+
+ return &ndom->domain;
+out_err:
+ kfree(ndom);
+ return ERR_PTR(ret);
+}
--
2.34.1