[PATCH v2 10/12] iommu/amd: Add support for nested domain allocation

Suravee Suthikulpanit posted 12 patches 4 months, 1 week ago
There is a newer version of this series
[PATCH v2 10/12] iommu/amd: Add support for nested domain allocation
Posted by Suravee Suthikulpanit 4 months, 1 week ago
The nested domain is allocated with IOMMU_DOMAIN_NESTED type to store
stage-1 translation (i.e. GVA->GPA). This includes the GCR3 root pointer
table along with guest page tables. The struct iommu_hwpt_amd_guest
contains this information, and is passed from user-space as a parameter
of the struct iommu_ops.domain_alloc_nested().

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 drivers/iommu/amd/Makefile          |  2 +-
 drivers/iommu/amd/amd_iommu.h       |  5 +++
 drivers/iommu/amd/amd_iommu_types.h | 33 +++++++++-----
 drivers/iommu/amd/iommu.c           | 14 +++++-
 drivers/iommu/amd/nested.c          | 67 +++++++++++++++++++++++++++++
 5 files changed, 109 insertions(+), 12 deletions(-)
 create mode 100644 drivers/iommu/amd/nested.c

diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index 5ae46d99a45b..afa12ca2110e 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-y += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o ppr.o pasid.o
-obj-$(CONFIG_AMD_IOMMU_IOMMUFD) += iommufd.o
+obj-$(CONFIG_AMD_IOMMU_IOMMUFD) += iommufd.o nested.o
 obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index d533bb8851ea..cc1f14899dfe 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -8,6 +8,7 @@
 #define AMD_IOMMU_H
 
 #include <linux/iommu.h>
+#include <uapi/linux/iommufd.h>
 
 #include "amd_iommu_types.h"
 
@@ -202,4 +203,8 @@ amd_iommu_make_clear_dte(struct iommu_dev_data *dev_data, struct dev_table_entry
 	new->data128[1] = 0;
 }
 
+/* NESTED */
+struct iommu_domain *
+amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
+			      const struct iommu_user_data *user_data);
 #endif /* AMD_IOMMU_H */
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index d8c755b2045d..ba27fad77b57 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -20,6 +20,8 @@
 #include <linux/irqreturn.h>
 #include <linux/io-pgtable.h>
 
+#include <uapi/linux/iommufd.h>
+
 /*
  * Maximum number of IOMMUs supported
  */
@@ -604,6 +606,25 @@ struct protection_domain {
 	struct list_head dev_data_list; /* List of pdom_dev_data */
 };
 
+/*
+ * Structure defining one entry in the device table
+ */
+struct dev_table_entry {
+	union {
+		u64 data[4];
+		u128 data128[2];
+	};
+};
+
+/*
+ * Nested domain is specifically used for nested translation
+ */
+struct nested_domain {
+	struct iommu_domain domain; /* generic domain handle used by iommu core code */
+	u16 id;	                    /* the domain id written to the device table */
+	struct dev_table_entry guest_dte; /* Guest vIOMMU DTE */
+};
+
 /*
  * This structure contains information about one PCI segment in the system.
  */
@@ -857,6 +878,8 @@ struct iommu_dev_data {
 	struct list_head list;		  /* For domain->dev_list */
 	struct llist_node dev_data_list;  /* For global dev_data_list */
 	struct protection_domain *domain; /* Domain the device is bound to */
+	struct protection_domain *parent; /* Parent Domain the device is bound to */
+	struct nested_domain *ndom;       /* Nested Domain the device is bound to */
 	struct gcr3_tbl_info gcr3_info;   /* Per-device GCR3 table */
 	struct device *dev;
 	u16 devid;			  /* PCI Device ID */
@@ -894,16 +917,6 @@ extern struct list_head amd_iommu_pci_seg_list;
  */
 extern struct list_head amd_iommu_list;
 
-/*
- * Structure defining one entry in the device table
- */
-struct dev_table_entry {
-	union {
-		u64 data[4];
-		u128 data128[2];
-	};
-};
-
 /*
  * Structure defining one entry in the command buffer
  */
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index facee0f7a131..c1abb06126c1 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2613,6 +2613,9 @@ amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
 	case IOMMU_HWPT_ALLOC_DIRTY_TRACKING:
 	case IOMMU_HWPT_ALLOC_NEST_PARENT:
 	case IOMMU_HWPT_ALLOC_DIRTY_TRACKING | IOMMU_HWPT_ALLOC_NEST_PARENT:
+	{
+		struct iommu_domain *dom;
+
 		/*
 		 * Allocate domain with v1 page table for dirty tracking
 		 * and/or Nest parent.
@@ -2620,7 +2623,16 @@ amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
 		if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&
 		    !amd_iommu_hd_support(iommu))
 			break;
-		return do_iommu_domain_alloc(dev, flags, PD_MODE_V1);
+
+		dom = do_iommu_domain_alloc(dev, flags, PD_MODE_V1);
+		if (!IS_ERR_VALUE(dom) &&
+		    (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)) {
+			struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
+
+			dev_data->parent = to_pdomain(dom);
+		}
+		return dom;
+	}
 	case IOMMU_HWPT_ALLOC_PASID:
 		/* Allocate domain with v2 page table if IOMMU supports PASID. */
 		if (!amd_iommu_pasid_supported())
diff --git a/drivers/iommu/amd/nested.c b/drivers/iommu/amd/nested.c
new file mode 100644
index 000000000000..11a0237174bb
--- /dev/null
+++ b/drivers/iommu/amd/nested.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 Advanced Micro Devices, Inc.
+ */
+
+#define pr_fmt(fmt)     "AMD-Vi: " fmt
+#define dev_fmt(fmt)    pr_fmt(fmt)
+
+#include <linux/iommu.h>
+
+#include "amd_iommu.h"
+#include "amd_iommu_types.h"
+
+static const struct iommu_domain_ops nested_domain_ops = {
+	.free = amd_iommu_domain_free,
+};
+
+static inline struct nested_domain *to_ndomain(struct iommu_domain *dom)
+{
+	return container_of(dom, struct nested_domain, domain);
+}
+
+struct iommu_domain *
+amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
+			      const struct iommu_user_data *user_data)
+{
+	int ret;
+	struct iommu_hwpt_amd_guest gdte;
+	struct nested_domain *ndom;
+
+	if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	/*
+	 * Need to make sure size of struct iommu_hwpt_amd_guest and
+	 * struct dev_table_entry are the same since it will be copied
+	 * from one to the other later on.
+	 */
+	if (WARN_ON(sizeof(struct dev_table_entry) != sizeof(gdte)))
+		return ERR_PTR(-EINVAL);
+
+	ret = iommu_copy_struct_from_user(&gdte, user_data,
+					  IOMMU_HWPT_DATA_AMD_GUEST,
+					  dte);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ndom = kzalloc(sizeof(*ndom), GFP_KERNEL);
+	if (IS_ERR(ndom))
+		return ERR_PTR(-ENOMEM);
+
+	ndom->domain.ops = &nested_domain_ops;
+	ndom->domain.type = IOMMU_DOMAIN_NESTED;
+	memcpy(&ndom->guest_dte, &gdte, sizeof(struct dev_table_entry));
+
+	/* Due to possible aliasing issue use per-device nested domain ID */
+	ndom->id = amd_iommu_pdom_id_alloc();
+	if (ndom->id <= 0) {
+		ret = ndom->id;
+		goto out_err;
+	}
+
+	return &ndom->domain;
+out_err:
+	kfree(ndom);
+	return ERR_PTR(ret);
+}
-- 
2.34.1
Re: [PATCH v2 10/12] iommu/amd: Add support for nested domain allocation
Posted by Jason Gunthorpe 4 months ago
On Wed, Oct 01, 2025 at 06:09:52AM +0000, Suravee Suthikulpanit wrote:

> @@ -857,6 +878,8 @@ struct iommu_dev_data {
>  	struct list_head list;		  /* For domain->dev_list */
>  	struct llist_node dev_data_list;  /* For global dev_data_list */
>  	struct protection_domain *domain; /* Domain the device is bound to */
> +	struct protection_domain *parent; /* Parent Domain the device is bound to */
> +	struct nested_domain *ndom;       /* Nested Domain the device is bound to */

These two should never be needed. domain should point to the nested
domain and it can get the other information from it.

> @@ -2620,7 +2623,16 @@ amd_iommu_domain_alloc_paging_flags(struct device *dev, u32 flags,
>  		if ((flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING) &&
>  		    !amd_iommu_hd_support(iommu))
>  			break;
> -		return do_iommu_domain_alloc(dev, flags, PD_MODE_V1);
> +
> +		dom = do_iommu_domain_alloc(dev, flags, PD_MODE_V1);
> +		if (!IS_ERR_VALUE(dom) &&
> +		    (flags & IOMMU_HWPT_ALLOC_NEST_PARENT)) {
> +			struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
> +
> +			dev_data->parent = to_pdomain(dom);

We talked about this many times already. Domain allocation DOES NOT
CHANGE the dev_data.

> +#define pr_fmt(fmt)     "AMD-Vi: " fmt
> +#define dev_fmt(fmt)    pr_fmt(fmt)

I'm not sure these make sense, you should not be using pr_fmt, use a
dev_XX on the iommu instance.

> +struct iommu_domain *
> +amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
> +			      const struct iommu_user_data *user_data)
> +{
> +	int ret;
> +	struct iommu_hwpt_amd_guest gdte;
> +	struct nested_domain *ndom;
> +
> +	if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST)
> +		return ERR_PTR(-EOPNOTSUPP);
> +
> +	/*
> +	 * Need to make sure size of struct iommu_hwpt_amd_guest and
> +	 * struct dev_table_entry are the same since it will be copied
> +	 * from one to the other later on.
> +	 */
> +	if (WARN_ON(sizeof(struct dev_table_entry) != sizeof(gdte)))
> +		return ERR_PTR(-EINVAL);

use static_assert for something like this.

> +
> +	ret = iommu_copy_struct_from_user(&gdte, user_data,
> +					  IOMMU_HWPT_DATA_AMD_GUEST,
> +					  dte);
> +	if (ret)
> +		return ERR_PTR(ret);
> +
> +	ndom = kzalloc(sizeof(*ndom), GFP_KERNEL);
> +	if (IS_ERR(ndom))
> +		return ERR_PTR(-ENOMEM);
> +
> +	ndom->domain.ops = &nested_domain_ops;
> +	ndom->domain.type = IOMMU_DOMAIN_NESTED;
> +	memcpy(&ndom->guest_dte, &gdte, sizeof(struct dev_table_entry));
> +
> +	/* Due to possible aliasing issue use per-device nested domain ID */
> +	ndom->id = amd_iommu_pdom_id_alloc();

I've forgotten the details, but doesn't the gdet have the virtual
domain ID in side it? Shouldn't this be going to the viommu struct and
mapping virtual domain IDs to physical ones so they can be shared if
the guest says it is safe to share them? I guess that is an
optimization, but it should have a note here explaining it.

Jason
Re: [PATCH v2 10/12] iommu/amd: Add support for nested domain allocation
Posted by Suthikulpanit, Suravee 4 months ago

On 10/6/2025 9:49 AM, Jason Gunthorpe wrote:
> On Wed, Oct 01, 2025 at 06:09:52AM +0000, Suravee Suthikulpanit wrote:
> ....
>> +
>> +	ret = iommu_copy_struct_from_user(&gdte, user_data,
>> +					  IOMMU_HWPT_DATA_AMD_GUEST,
>> +					  dte);
>> +	if (ret)
>> +		return ERR_PTR(ret);
>> +
>> +	ndom = kzalloc(sizeof(*ndom), GFP_KERNEL);
>> +	if (IS_ERR(ndom))
>> +		return ERR_PTR(-ENOMEM);
>> +
>> +	ndom->domain.ops = &nested_domain_ops;
>> +	ndom->domain.type = IOMMU_DOMAIN_NESTED;
>> +	memcpy(&ndom->guest_dte, &gdte, sizeof(struct dev_table_entry));
>> +
>> +	/* Due to possible aliasing issue use per-device nested domain ID */
>> +	ndom->id = amd_iommu_pdom_id_alloc();
> 
> I've forgotten the details, but doesn't the gdet have the virtual
> domain ID in side it? Shouldn't this be going to the viommu struct and
> mapping virtual domain IDs to physical ones so they can be shared if
> the guest says it is safe to share them? I guess that is an
> optimization, but it should have a note here explaining it.

The gDTE[DomainID] field contains guest Domain ID (gDomID). The host 
IOMMU driver uses the gDomId and guest ID (gid) to index the Domain ID 
mapping table, and store the host Domain ID (hDomID) in the table entry. 
This data structure is required by hw to translation gDomID->hDomID to 
virtualize guest invalidation command. This will be part of the upcoming 
series to enable hw-vIOMMU.

This ndom->id is the hDomID, which is currently allocated per-device to 
avoid TLB aliasing i.e. A guest w/ multiple pass-through devices w/ the 
same hDomID (same stage 2 table) and different stage-1 tables with same 
PASID. IOMMU would use the same TLB tag, which results in TLB aliasing 
issue. Therefore, we workaround the issue by allocating per-device 
hDomID for nested domain.

Thanks,
Suravee



Thanks,
Suravee
Re: [PATCH v2 10/12] iommu/amd: Add support for nested domain allocation
Posted by Jason Gunthorpe 4 months ago
On Tue, Oct 07, 2025 at 03:36:58PM -0500, Suthikulpanit, Suravee wrote:
> The gDTE[DomainID] field contains guest Domain ID (gDomID). The host IOMMU
> driver uses the gDomId and guest ID (gid) to index the Domain ID mapping
> table, and store the host Domain ID (hDomID) in the table entry. This data
> structure is required by hw to translation gDomID->hDomID to virtualize
> guest invalidation command. This will be part of the upcoming series to
> enable hw-vIOMMU.

Sure, this translation is part of viommu

> This ndom->id is the hDomID, which is currently allocated per-device to
> avoid TLB aliasing i.e. A guest w/ multiple pass-through devices w/ the same
> hDomID (same stage 2 table) and different stage-1 tables with same PASID.
> IOMMU would use the same TLB tag, which results in TLB aliasing issue.
> Therefore, we workaround the issue by allocating per-device hDomID for
> nested domain.

But this is what I mean here, the gDomId should be 1:1 with the hDomId
and here you are making it 1:N.

It has to be like this or you cannot manage invalidation.

Given this series is not really functional it is OK to leave a little
hack I guess, but it is worth noting how it is supposed to work.

It also probably means we should see the viommu series pretty quickly
with a goal to merge them all together in one cycle.

Jason
Re: [PATCH v2 10/12] iommu/amd: Add support for nested domain allocation
Posted by Sairaj Kodilkar 4 months ago

On 10/8/2025 5:09 AM, Jason Gunthorpe wrote:
> On Tue, Oct 07, 2025 at 03:36:58PM -0500, Suthikulpanit, Suravee wrote:
>> The gDTE[DomainID] field contains guest Domain ID (gDomID). The host IOMMU
>> driver uses the gDomId and guest ID (gid) to index the Domain ID mapping
>> table, and store the host Domain ID (hDomID) in the table entry. This data
>> structure is required by hw to translation gDomID->hDomID to virtualize
>> guest invalidation command. This will be part of the upcoming series to
>> enable hw-vIOMMU.
> Sure, this translation is part of viommu
>
>> This ndom->id is the hDomID, which is currently allocated per-device to
>> avoid TLB aliasing i.e. A guest w/ multiple pass-through devices w/ the same
>> hDomID (same stage 2 table) and different stage-1 tables with same PASID.
>> IOMMU would use the same TLB tag, which results in TLB aliasing issue.
>> Therefore, we workaround the issue by allocating per-device hDomID for
>> nested domain.
> But this is what I mean here, the gDomId should be 1:1 with the hDomId
> and here you are making it 1:N.
Hi Jason,
The guest will only see V2 page table when we are using hardware vIOMMU.
Since IOMMU driver allocates per device domains when it is using V2 page 
table,
the mappings are still N:N and invalidations will work similar to V2 
page table mode in
host.

Thanks
Sairaj
>
> It has to be like this or you cannot manage invalidation.
>
> Given this series is not really functional it is OK to leave a little
> hack I guess, but it is worth noting how it is supposed to work.
>
> It also probably means we should see the viommu series pretty quickly
> with a goal to merge them all together in one cycle.
>
> Jason
Re: [PATCH v2 10/12] iommu/amd: Add support for nested domain allocation
Posted by Jason Gunthorpe 4 months ago
On Thu, Oct 09, 2025 at 11:52:23AM +0530, Sairaj Kodilkar wrote:
> 
> 
> On 10/8/2025 5:09 AM, Jason Gunthorpe wrote:
> > On Tue, Oct 07, 2025 at 03:36:58PM -0500, Suthikulpanit, Suravee wrote:
> > > The gDTE[DomainID] field contains guest Domain ID (gDomID). The host IOMMU
> > > driver uses the gDomId and guest ID (gid) to index the Domain ID mapping
> > > table, and store the host Domain ID (hDomID) in the table entry. This data
> > > structure is required by hw to translation gDomID->hDomID to virtualize
> > > guest invalidation command. This will be part of the upcoming series to
> > > enable hw-vIOMMU.
> > Sure, this translation is part of viommu
> > 
> > > This ndom->id is the hDomID, which is currently allocated per-device to
> > > avoid TLB aliasing i.e. A guest w/ multiple pass-through devices w/ the same
> > > hDomID (same stage 2 table) and different stage-1 tables with same PASID.
> > > IOMMU would use the same TLB tag, which results in TLB aliasing issue.
> > > Therefore, we workaround the issue by allocating per-device hDomID for
> > > nested domain.
> > But this is what I mean here, the gDomId should be 1:1 with the hDomId
> > and here you are making it 1:N.
> Hi Jason,
> The guest will only see V2 page table when we are using hardware vIOMMU.

??

This patch is about adding the gDTE support to the driver and the GDTE
is the mechanism for userspace to inform the kernel about the V2 page
table in the guest.

If the idea at this point is to not support V2 page table then have
this function validate the gDTE to exclude it.

> Since IOMMU driver allocates per device domains when it is using V2
> page table, the mappings are still N:N and invalidations will work
> similar to V2 page table mode in host.

I don't see how this can work. Invalidations will be pushed by the
guest kernel directly to the HW invalidation queue using the
gDOMID. That must translate to a single hDOMID to invalidate the right
stuff.

If there is a hDOMID per device it cannot work unless the guest is
also making per-device IDs.

But we can't make this assumption in the viommu code.

So you must not do this, the gDOMID must be mapped to exactly one
hDOMID, and the viommu object should be managing this. When attaching
a gDTE the kernel should validate that the gDOMID maps to a hDOMID
that has the same V1 page table.

Jason
Re: [PATCH v2 10/12] iommu/amd: Add support for nested domain allocation
Posted by Sairaj Kodilkar 4 months ago

On 10/9/2025 11:52 AM, Sairaj Kodilkar wrote:
>
>
> On 10/8/2025 5:09 AM, Jason Gunthorpe wrote:
>> On Tue, Oct 07, 2025 at 03:36:58PM -0500, Suthikulpanit, Suravee wrote:
>>> The gDTE[DomainID] field contains guest Domain ID (gDomID). The host 
>>> IOMMU
>>> driver uses the gDomId and guest ID (gid) to index the Domain ID 
>>> mapping
>>> table, and store the host Domain ID (hDomID) in the table entry. 
>>> This data
>>> structure is required by hw to translation gDomID->hDomID to virtualize
>>> guest invalidation command. This will be part of the upcoming series to
>>> enable hw-vIOMMU.
>> Sure, this translation is part of viommu
>>
>>> This ndom->id is the hDomID, which is currently allocated per-device to
>>> avoid TLB aliasing i.e. A guest w/ multiple pass-through devices w/ 
>>> the same
>>> hDomID (same stage 2 table) and different stage-1 tables with same 
>>> PASID.
>>> IOMMU would use the same TLB tag, which results in TLB aliasing issue.
>>> Therefore, we workaround the issue by allocating per-device hDomID for
>>> nested domain.
>> But this is what I mean here, the gDomId should be 1:1 with the hDomId
>> and here you are making it 1:N.
> Hi Jason,
> The guest will only see V2 page table when we are using hardware vIOMMU.
> Since IOMMU driver allocates per device domains when it is using V2 
> page table,
> the mappings are still N:N and invalidations will work similar to V2 
> page table mode in
> host.
Small correction --> "mappings are still 1:1" because guest IOMMU driver 
allocates per device domain.

Thanks
sairaj
Re: [PATCH v2 10/12] iommu/amd: Add support for nested domain allocation
Posted by Nicolin Chen 4 months, 1 week ago
On Wed, Oct 01, 2025 at 06:09:52AM +0000, Suravee Suthikulpanit wrote:
> The nested domain is allocated with IOMMU_DOMAIN_NESTED type to store
> stage-1 translation (i.e. GVA->GPA). This includes the GCR3 root pointer
> table along with guest page tables. The struct iommu_hwpt_amd_guest
> contains this information, and is passed from user-space as a parameter
> of the struct iommu_ops.domain_alloc_nested().
> 
> Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>

> diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
> index d533bb8851ea..cc1f14899dfe 100644
> --- a/drivers/iommu/amd/amd_iommu.h
> +++ b/drivers/iommu/amd/amd_iommu.h
> @@ -8,6 +8,7 @@
>  #define AMD_IOMMU_H
>  
>  #include <linux/iommu.h>
> +#include <uapi/linux/iommufd.h>
>  
>  #include "amd_iommu_types.h"

amd_iommu_types.h is adding the uAPI header in this patch too. So,
this new "include" here is redundant.

> +++ b/drivers/iommu/amd/nested.c
> @@ -0,0 +1,67 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2025 Advanced Micro Devices, Inc.
> + */
> +
> +#define pr_fmt(fmt)     "AMD-Vi: " fmt
> +#define dev_fmt(fmt)    pr_fmt(fmt)

#define pr_fmt(fmt)     "AMD-Vi: " fmt
#define dev_fmt		pr_fmt

> +
> +#include <linux/iommu.h>
> +
> +#include "amd_iommu.h"
> +#include "amd_iommu_types.h"

amd_iommu.h covers both linux/iommu.h and amd_iommu_types.h.

> +struct iommu_domain *
> +amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
> +			      const struct iommu_user_data *user_data)
> +{
> +	int ret;
> +	struct iommu_hwpt_amd_guest gdte;
> +	struct nested_domain *ndom;
> +
> +	if (user_data->type != IOMMU_HWPT_DATA_AMD_GUEST)
> +		return ERR_PTR(-EOPNOTSUPP);
> +
> +	/*
> +	 * Need to make sure size of struct iommu_hwpt_amd_guest and
> +	 * struct dev_table_entry are the same since it will be copied
> +	 * from one to the other later on.
> +	 */
> +	if (WARN_ON(sizeof(struct dev_table_entry) != sizeof(gdte)))
> +		return ERR_PTR(-EINVAL);

	static_assert(sizeof(struct dev_table_entry) == sizeof(gdte));

> +
> +	ret = iommu_copy_struct_from_user(&gdte, user_data,
> +					  IOMMU_HWPT_DATA_AMD_GUEST,
> +					  dte);
> +	if (ret)
> +		return ERR_PTR(ret);
> +
> +	ndom = kzalloc(sizeof(*ndom), GFP_KERNEL);
> +	if (IS_ERR(ndom))
> +		return ERR_PTR(-ENOMEM);

	if (!ndom)
		return ERR_PTR(-ENOMEM);

> +
> +	ndom->domain.ops = &nested_domain_ops;
> +	ndom->domain.type = IOMMU_DOMAIN_NESTED;
> +	memcpy(&ndom->guest_dte, &gdte, sizeof(struct dev_table_entry));
> +
> +	/* Due to possible aliasing issue use per-device nested domain ID */
> +	ndom->id = amd_iommu_pdom_id_alloc();
> +	if (ndom->id <= 0) {
> +		ret = ndom->id;
> +		goto out_err;

if ndom->id = 0, ret = 0, meaning this function does not fail..

Given that ida_alloc_range has the range [1, MAX_DOMAIN_ID - 1],
0 should never be returned. So just check if (ndom->id < 0) will
be enough.

Or you could override the "ret" entirely like other places do:

	if (ndom->id <= 0) {
		ret = -ENOSPC;
		goto out_err;
	}

With all these fixed,

Reviewed-by: Nicolin Chen <nicolinc@nvidia.com>