[PATCH v5 13/14] iommu/amd: Refactor logic to program the host page table in DTE

Suravee Suthikulpanit posted 14 patches 1 month ago
[PATCH v5 13/14] iommu/amd: Refactor logic to program the host page table in DTE
Posted by Suravee Suthikulpanit 1 month ago
Introduce the amd_iommu_set_dte_v1() helper function to configure
IOMMU host (v1) page table into DTE.

Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 drivers/iommu/amd/amd_iommu.h       |  5 ++
 drivers/iommu/amd/amd_iommu_types.h |  1 +
 drivers/iommu/amd/iommu.c           | 94 ++++++++++++++++-------------
 3 files changed, 59 insertions(+), 41 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index ebedb49c28cf..df167ae9e4aa 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -187,6 +187,11 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
 struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
 struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid);
 
+void amd_iommu_set_dte_v1(struct iommu_dev_data *dev_data,
+			  struct protection_domain *domain,
+			  u16 domid, struct pt_iommu_amdv1_hw_info *pt_info,
+			  struct dev_table_entry *new);
+
 void amd_iommu_update_dte(struct amd_iommu *iommu,
 			  struct iommu_dev_data *dev_data,
 			  struct dev_table_entry *new);
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 734f6a753b3a..19a98c6490cb 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -352,6 +352,7 @@
 #define DTE_FLAG_HAD	(3ULL << 7)
 #define DTE_MODE_MASK	GENMASK_ULL(11, 9)
 #define DTE_HOST_TRP	GENMASK_ULL(51, 12)
+#define DTE_FLAG_PPR	BIT_ULL(52)
 #define DTE_FLAG_GIOV	BIT_ULL(54)
 #define DTE_FLAG_GV	BIT_ULL(55)
 #define DTE_GLX		GENMASK_ULL(57, 56)
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 24bab275e8c0..d392b6757f2a 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2030,36 +2030,56 @@ int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid)
  * Note:
  * The old value for GCR3 table and GPT have been cleared from caller.
  */
-static void set_dte_gcr3_table(struct amd_iommu *iommu,
-			       struct iommu_dev_data *dev_data,
-			       struct dev_table_entry *target)
+static void set_dte_gcr3_table(struct iommu_dev_data *dev_data,
+			       struct dev_table_entry *new)
 {
 	struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
-	u64 gcr3;
+	u64 gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
 
-	if (!gcr3_info->gcr3_tbl)
-		return;
-
-	pr_debug("%s: devid=%#x, glx=%#x, gcr3_tbl=%#llx\n",
-		 __func__, dev_data->devid, gcr3_info->glx,
-		 (unsigned long long)gcr3_info->gcr3_tbl);
-
-	gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
+	new->data[0] |= DTE_FLAG_TV |
+			(dev_data->ppr ? DTE_FLAG_PPR : 0) |
+			(pdom_is_v2_pgtbl_mode(dev_data->domain) ?  DTE_FLAG_GIOV : 0) |
+			DTE_FLAG_GV |
+			FIELD_PREP(DTE_GLX, gcr3_info->glx) |
+			FIELD_PREP(DTE_GCR3_14_12, gcr3 >> 12) |
+			DTE_FLAG_IR | DTE_FLAG_IW;
 
-	target->data[0] |= DTE_FLAG_GV |
-			   FIELD_PREP(DTE_GLX, gcr3_info->glx) |
-			   FIELD_PREP(DTE_GCR3_14_12, gcr3 >> 12);
-	if (pdom_is_v2_pgtbl_mode(dev_data->domain))
-		target->data[0] |= DTE_FLAG_GIOV;
-
-	target->data[1] |= FIELD_PREP(DTE_GCR3_30_15, gcr3 >> 15) |
-			   FIELD_PREP(DTE_GCR3_51_31, gcr3 >> 31);
+	new->data[1] |= FIELD_PREP(DTE_DOMID_MASK, dev_data->gcr3_info.domid) |
+			FIELD_PREP(DTE_GCR3_30_15, gcr3 >> 15) |
+			(dev_data->ats_enabled ? DTE_FLAG_IOTLB : 0) |
+			FIELD_PREP(DTE_GCR3_51_31, gcr3 >> 31);
 
 	/* Guest page table can only support 4 and 5 levels  */
 	if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL)
-		target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_5_LEVEL);
+		new->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_5_LEVEL);
 	else
-		target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_4_LEVEL);
+		new->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_4_LEVEL);
+}
+
+void amd_iommu_set_dte_v1(struct iommu_dev_data *dev_data,
+			  struct protection_domain *domain,
+			  u16 domid, struct pt_iommu_amdv1_hw_info *pt_info,
+			  struct dev_table_entry *new)
+{
+	/* Note Dirty tracking is used for v1 table only for now */
+	new->data[0] |= DTE_FLAG_TV |
+			FIELD_PREP(DTE_MODE_MASK, pt_info->mode) |
+			(domain->dirty_tracking ? DTE_FLAG_HAD : 0) |
+			FIELD_PREP(DTE_HOST_TRP, pt_info->host_pt_root >> 12) |
+			DTE_FLAG_IR | DTE_FLAG_IW;
+
+	new->data[1] |= FIELD_PREP(DTE_DOMID_MASK, domid) |
+			(dev_data->ats_enabled ? DTE_FLAG_IOTLB : 0);
+}
+
+static void set_dte_passthrough(struct iommu_dev_data *dev_data,
+				struct protection_domain *domain,
+				struct dev_table_entry *new)
+{
+	new->data[0] |= DTE_FLAG_TV | DTE_FLAG_IR | DTE_FLAG_IW;
+
+	new->data[1] |= FIELD_PREP(DTE_DOMID_MASK, domain->id) |
+			(dev_data->ats_enabled) ? DTE_FLAG_IOTLB : 0;
 }
 
 static void set_dte_entry(struct amd_iommu *iommu,
@@ -2074,8 +2094,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
 	struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
 	struct pt_iommu_amdv1_hw_info pt_info;
 
-	amd_iommu_make_clear_dte(dev_data, &new);
-
 	if (gcr3_info && gcr3_info->gcr3_tbl)
 		domid = dev_data->gcr3_info.domid;
 	else {
@@ -2097,35 +2115,29 @@ static void set_dte_entry(struct amd_iommu *iommu,
 						       &pt_info);
 			}
 
-			new.data[0] |= __sme_set(pt_info.host_pt_root) |
-				       (pt_info.mode & DEV_ENTRY_MODE_MASK)
-					       << DEV_ENTRY_MODE_SHIFT;
+			pt_info.host_pt_root = __sme_set(pt_info.host_pt_root);
 		}
 	}
 
-	new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW;
-
 	/*
 	 * When SNP is enabled, we can only support TV=1 with non-zero domain ID.
 	 * This is prevented by the SNP-enable and IOMMU_DOMAIN_IDENTITY check in
 	 * do_iommu_domain_alloc().
 	 */
 	WARN_ON(amd_iommu_snp_en && (domid == 0));
-	new.data[0] |= DTE_FLAG_TV;
-
-	if (dev_data->ppr)
-		new.data[0] |= 1ULL << DEV_ENTRY_PPR;
-
-	if (domain->dirty_tracking)
-		new.data[0] |= DTE_FLAG_HAD;
 
-	if (dev_data->ats_enabled)
-		new.data[1] |= DTE_FLAG_IOTLB;
+	amd_iommu_make_clear_dte(dev_data, &new);
 
 	old_domid = READ_ONCE(dte->data[1]) & DTE_DOMID_MASK;
-	new.data[1] |= domid;
-
-	set_dte_gcr3_table(iommu, dev_data, &new);
+	if (gcr3_info && gcr3_info->gcr3_tbl)
+		set_dte_gcr3_table(dev_data, &new);
+	else if (domain->domain.type == IOMMU_DOMAIN_IDENTITY)
+		set_dte_passthrough(dev_data, domain, &new);
+	else if (domain->domain.type & __IOMMU_DOMAIN_PAGING &&
+		 domain->pd_mode == PD_MODE_V1)
+		amd_iommu_set_dte_v1(dev_data, domain, domid, &pt_info, &new);
+	else
+		WARN_ON(true);
 
 	amd_iommu_update_dte(iommu, dev_data, &new);
 
-- 
2.34.1
Re: [PATCH v5 13/14] iommu/amd: Refactor logic to program the host page table in DTE
Posted by Jason Gunthorpe 4 weeks ago
On Wed, Nov 12, 2025 at 06:25:05PM +0000, Suravee Suthikulpanit wrote:

> @@ -2097,35 +2115,29 @@ static void set_dte_entry(struct amd_iommu *iommu,
>  	old_domid = READ_ONCE(dte->data[1]) & DTE_DOMID_MASK;
> -	new.data[1] |= domid;
> -
> -	set_dte_gcr3_table(iommu, dev_data, &new);
> +	if (gcr3_info && gcr3_info->gcr3_tbl)
> +		set_dte_gcr3_table(dev_data, &new);
> +	else if (domain->domain.type == IOMMU_DOMAIN_IDENTITY)
> +		set_dte_passthrough(dev_data, domain, &new);
> +	else if (domain->domain.type & __IOMMU_DOMAIN_PAGING &&

() around the & is kernel style I think

Otherwise this looks OK

Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>

In some future the set_dte_xx should be called from the domain attach
function that knows what kind of domain it is attaching already - ie
identity, v1, v2 all should have different attach functions.

Jason
Re: [PATCH v5 13/14] iommu/amd: Refactor logic to program the host page table in DTE
Posted by Nicolin Chen 1 month ago
On Wed, Nov 12, 2025 at 06:25:05PM +0000, Suravee Suthikulpanit wrote:
>  static void set_dte_entry(struct amd_iommu *iommu,
> @@ -2074,8 +2094,6 @@ static void set_dte_entry(struct amd_iommu *iommu,
>  	struct dev_table_entry *dte = &get_dev_table(iommu)[dev_data->devid];
>  	struct pt_iommu_amdv1_hw_info pt_info;
>  
> -	amd_iommu_make_clear_dte(dev_data, &new);
> -
>  	if (gcr3_info && gcr3_info->gcr3_tbl)
>  		domid = dev_data->gcr3_info.domid;

set_dte_gcr3_table() now always get the domid from this link, so
it's unnecessary.

>  	else {
> @@ -2097,35 +2115,29 @@ static void set_dte_entry(struct amd_iommu *iommu,
>  						       &pt_info);
>  			}
>  
> -			new.data[0] |= __sme_set(pt_info.host_pt_root) |
> -				       (pt_info.mode & DEV_ENTRY_MODE_MASK)
> -					       << DEV_ENTRY_MODE_SHIFT;
> +			pt_info.host_pt_root = __sme_set(pt_info.host_pt_root);
>  		}
>  	}

And this __IOMMU_DOMAIN_PAGING path seems to be used by v1 only.
So, it could be squashed into amd_iommu_set_dte_v1(). This could
tidy set_dte_entry() further.

>  
> -	new.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW;
> -
>  	/*
>  	 * When SNP is enabled, we can only support TV=1 with non-zero domain ID.
>  	 * This is prevented by the SNP-enable and IOMMU_DOMAIN_IDENTITY check in
>  	 * do_iommu_domain_alloc().
>  	 */
>  	WARN_ON(amd_iommu_snp_en && (domid == 0));

This, if it's very necessary, can go into individual functions.

Though I am not sure if it is accurate as it seems to imply that
IOMMU_DOMAIN_IDENTITY has domid == 0?

But an IOMMU_DOMAIN_IDENTITY does:
	identity_domain.id = amd_iommu_pdom_id_alloc();
doing:
	ida_alloc_range(&pdom_ids, 1, MAX_DOMAIN_ID - 1, GFP_ATOMIC);
which means it never returns 0.

Nicolin
Re: [PATCH v5 13/14] iommu/amd: Refactor logic to program the host page table in DTE
Posted by Jason Gunthorpe 4 weeks ago
On Thu, Nov 13, 2025 at 01:19:13PM -0800, Nicolin Chen wrote:

> >  	else {
> > @@ -2097,35 +2115,29 @@ static void set_dte_entry(struct amd_iommu *iommu,
> >  						       &pt_info);
> >  			}
> >  
> > -			new.data[0] |= __sme_set(pt_info.host_pt_root) |
> > -				       (pt_info.mode & DEV_ENTRY_MODE_MASK)
> > -					       << DEV_ENTRY_MODE_SHIFT;
> > +			pt_info.host_pt_root = __sme_set(pt_info.host_pt_root);
> >  		}
> >  	}
> 
> And this __IOMMU_DOMAIN_PAGING path seems to be used by v1 only.
> So, it could be squashed into amd_iommu_set_dte_v1(). This could
> tidy set_dte_entry() further.

Oh yes, definately should be done, the whole pt info grab for v1
belongs inside the set v1 function.

> >  	/*
> >  	 * When SNP is enabled, we can only support TV=1 with non-zero domain ID.
> >  	 * This is prevented by the SNP-enable and IOMMU_DOMAIN_IDENTITY check in
> >  	 * do_iommu_domain_alloc().
> >  	 */
> >  	WARN_ON(amd_iommu_snp_en && (domid == 0));
> 
> This, if it's very necessary, can go into individual functions.
>
> Though I am not sure if it is accurate as it seems to imply that
> IOMMU_DOMAIN_IDENTITY has domid == 0?

I never quite understood this comment myself either :\

> But an IOMMU_DOMAIN_IDENTITY does:
> 	identity_domain.id = amd_iommu_pdom_id_alloc();
> doing:
> 	ida_alloc_range(&pdom_ids, 1, MAX_DOMAIN_ID - 1, GFP_ATOMIC);
> which means it never returns 0.

Hmm!

Jason
Re: [PATCH v5 13/14] iommu/amd: Refactor logic to program the host page table in DTE
Posted by Nicolin Chen 1 month ago
On Thu, Nov 13, 2025 at 01:19:16PM -0800, Nicolin Chen wrote:
> On Wed, Nov 12, 2025 at 06:25:05PM +0000, Suravee Suthikulpanit wrote:
> >  	else {
> > @@ -2097,35 +2115,29 @@ static void set_dte_entry(struct amd_iommu *iommu,
> >  						       &pt_info);
> >  			}
> >  
> > -			new.data[0] |= __sme_set(pt_info.host_pt_root) |
> > -				       (pt_info.mode & DEV_ENTRY_MODE_MASK)
> > -					       << DEV_ENTRY_MODE_SHIFT;
> > +			pt_info.host_pt_root = __sme_set(pt_info.host_pt_root);
> >  		}
> >  	}
> 
> And this __IOMMU_DOMAIN_PAGING path seems to be used by v1 only.
> So, it could be squashed into amd_iommu_set_dte_v1(). This could
> tidy set_dte_entry() further.

Having looked at PATCH-14, I realized that amd_iommu_set_dte_v1()
is shared with the nesting pathway.

So perhaps:
	else if (domain->domain.type & __IOMMU_DOMAIN_PAGING &&
		 domain->pd_mode == PD_MODE_V1) {
		struct pt_iommu_amdv1_hw_info pt_info;

		....; // <--move here
		amd_iommu_set_dte_v1(dev_data, domain, domid, &pt_info, &new)
	} else

Nicolin
Re: [PATCH v5 13/14] iommu/amd: Refactor logic to program the host page table in DTE
Posted by Jason Gunthorpe 4 weeks ago
On Thu, Nov 13, 2025 at 01:29:39PM -0800, Nicolin Chen wrote:
> On Thu, Nov 13, 2025 at 01:19:16PM -0800, Nicolin Chen wrote:
> > On Wed, Nov 12, 2025 at 06:25:05PM +0000, Suravee Suthikulpanit wrote:
> > >  	else {
> > > @@ -2097,35 +2115,29 @@ static void set_dte_entry(struct amd_iommu *iommu,
> > >  						       &pt_info);
> > >  			}
> > >  
> > > -			new.data[0] |= __sme_set(pt_info.host_pt_root) |
> > > -				       (pt_info.mode & DEV_ENTRY_MODE_MASK)
> > > -					       << DEV_ENTRY_MODE_SHIFT;
> > > +			pt_info.host_pt_root = __sme_set(pt_info.host_pt_root);
> > >  		}
> > >  	}
> > 
> > And this __IOMMU_DOMAIN_PAGING path seems to be used by v1 only.
> > So, it could be squashed into amd_iommu_set_dte_v1(). This could
> > tidy set_dte_entry() further.
> 
> Having looked at PATCH-14, I realized that amd_iommu_set_dte_v1()
> is shared with the nesting pathway.
> 
> So perhaps:
> 	else if (domain->domain.type & __IOMMU_DOMAIN_PAGING &&
> 		 domain->pd_mode == PD_MODE_V1) {
> 		struct pt_iommu_amdv1_hw_info pt_info;
> 
> 		....; // <--move here
> 		amd_iommu_set_dte_v1(dev_data, domain, domid, &pt_info, &new)
> 	} else

Even so it should be called from the set functions and the set
functions should sort it out, if another helper is needed to share
with nesting then fine, but you might also just have nesting call the
set_dte_v1 to start and then modify in the gcr3 table..

Jason