[RFCv2 PATCH 2/7] iommu/amd: Refactor set_dte_entry

Suravee Suthikulpanit posted 7 patches 1 year, 11 months ago
[RFCv2 PATCH 2/7] iommu/amd: Refactor set_dte_entry
Posted by Suravee Suthikulpanit 1 year, 11 months ago
Separate logic for setting DTE[GCR3 Root Pointer Table] into a helper
function set_dte_gcr3_table() to prepare for adding nested domain support.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
---
 drivers/iommu/amd/iommu.c | 124 ++++++++++++++++++++++----------------
 1 file changed, 72 insertions(+), 52 deletions(-)

diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index b9759f6d8be2..71099e5fbaee 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1917,89 +1917,109 @@ int amd_iommu_clear_gcr3(struct iommu_dev_data *dev_data, ioasid_t pasid)
 	return ret;
 }
 
+static void set_dte_gcr3_table(struct amd_iommu *iommu,
+			       struct iommu_dev_data *dev_data,
+			       struct dev_table_entry *target)
+{
+	struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
+	int devid = dev_data->devid;
+	u64 tmp, gcr3 = 0;
+
+	if (!gcr3_info || !gcr3_info->gcr3_tbl)
+		return;
+
+	pr_debug("%s: devid=%#x, glx=%#x, giov=%#x, gcr3_tbl=%#llx\n",
+		 __func__, devid, gcr3_info->glx, gcr3_info->giov,
+		 (unsigned long long)gcr3_info->gcr3_tbl);
+
+	tmp = gcr3_info->glx;
+	target->data[0] |= (tmp & DTE_GLX_MASK) << DTE_GLX_SHIFT;
+	if (gcr3_info->giov)
+		target->data[0] |= DTE_FLAG_GIOV;
+	target->data[0] |= DTE_FLAG_GV;
+
+	/* First mask out possible old values for GCR3 table */
+	tmp = DTE_GCR3_VAL_A(~0ULL) << DTE_GCR3_SHIFT_A;
+	target->data[0] &= ~tmp;
+	tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
+	target->data[1] &= ~tmp;
+	tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
+	target->data[1] &= ~tmp;
+
+	gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
+
+	/* Encode GCR3 table into DTE */
+	tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
+	target->data[0] |= tmp;
+	tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
+	target->data[1] |= tmp;
+	tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B;
+	target->data[1] |= tmp;
+	tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
+	target->data[1] |= tmp;
+
+	/* Use system default */
+	tmp = amd_iommu_gpt_level;
+
+	/* Mask out old values for GuestPagingMode */
+	target->data[2] &= ~(0x3ULL << DTE_GPT_LEVEL_SHIFT);
+	target->data[2] |= (tmp << DTE_GPT_LEVEL_SHIFT);
+}
+
 static void set_dte_entry(struct amd_iommu *iommu,
 			  struct iommu_dev_data *dev_data)
 {
-	u64 pte_root = 0;
-	u64 flags = 0;
-	u32 old_domid;
-	u16 devid = dev_data->devid;
 	u16 domid;
+	u16 devid = dev_data->devid;
 	struct protection_domain *domain = dev_data->domain;
+	struct dev_table_entry target = {.data = {0, 0, 0, 0}};
 	struct dev_table_entry *dev_table = get_dev_table(iommu);
-	struct gcr3_tbl_info *gcr3_info = &dev_data->gcr3_info;
+	u32 old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK;
 
 	if (domain_id_is_per_dev(domain))
 		domid = dev_data->domid;
 	else
 		domid = domain->id;
 
+	/*
+	 * Need to get the current value in dte[1,2] because they contain
+	 * interrupt-remapping settings, which has been programmed earlier.
+	 */
+	target.data[1] = dev_table[devid].data[1];
+	target.data[2] = dev_table[devid].data[2];
+
 	if (domain->iop.mode != PAGE_MODE_NONE)
-		pte_root = iommu_virt_to_phys(domain->iop.root);
+		target.data[0] = iommu_virt_to_phys(domain->iop.root);
 
-	pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
+	target.data[0] |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
 		    << DEV_ENTRY_MODE_SHIFT;
 
-	pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V;
+	target.data[0] |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V;
 
 	/*
 	 * When SNP is enabled, Only set TV bit when IOMMU
 	 * page translation is in use.
 	 */
 	if (!amd_iommu_snp_en || (domid != 0))
-		pte_root |= DTE_FLAG_TV;
-
-	flags = dev_table[devid].data[1];
+		target.data[0] |= DTE_FLAG_TV;
 
 	if (dev_data->ats_enabled)
-		flags |= DTE_FLAG_IOTLB;
+		target.data[1] |= DTE_FLAG_IOTLB;
 
 	if (dev_data->ppr)
-		pte_root |= 1ULL << DEV_ENTRY_PPR;
+		target.data[0] |= 1ULL << DEV_ENTRY_PPR;
 
 	if (domain->dirty_tracking)
-		pte_root |= DTE_FLAG_HAD;
-
-	if (gcr3_info && gcr3_info->gcr3_tbl) {
-		u64 gcr3 = iommu_virt_to_phys(gcr3_info->gcr3_tbl);
-		u64 glx  = gcr3_info->glx;
-		u64 tmp;
-
-		pte_root |= DTE_FLAG_GV;
-		pte_root |= (glx & DTE_GLX_MASK) << DTE_GLX_SHIFT;
-
-		/* First mask out possible old values for GCR3 table */
-		tmp = DTE_GCR3_VAL_B(~0ULL) << DTE_GCR3_SHIFT_B;
-		flags    &= ~tmp;
+		target.data[0] |= DTE_FLAG_HAD;
 
-		tmp = DTE_GCR3_VAL_C(~0ULL) << DTE_GCR3_SHIFT_C;
-		flags    &= ~tmp;
-
-		/* Encode GCR3 table into DTE */
-		tmp = DTE_GCR3_VAL_A(gcr3) << DTE_GCR3_SHIFT_A;
-		pte_root |= tmp;
-
-		tmp = DTE_GCR3_VAL_B(gcr3) << DTE_GCR3_SHIFT_B;
-		flags    |= tmp;
-
-		tmp = DTE_GCR3_VAL_C(gcr3) << DTE_GCR3_SHIFT_C;
-		flags    |= tmp;
-
-		if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) {
-			dev_table[devid].data[2] |=
-				((u64)GUEST_PGTABLE_5_LEVEL << DTE_GPT_LEVEL_SHIFT);
-		}
-
-		if (gcr3_info->giov)
-			pte_root |= DTE_FLAG_GIOV;
-	}
+	target.data[1] &= ~DEV_DOMID_MASK;
+	target.data[1] |= domid;
 
-	flags &= ~DEV_DOMID_MASK;
-	flags |= domid;
+	set_dte_gcr3_table(iommu, dev_data, &target);
 
-	old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK;
-	dev_table[devid].data[1]  = flags;
-	dev_table[devid].data[0]  = pte_root;
+	dev_table[devid].data[0] = target.data[0];
+	dev_table[devid].data[1] = target.data[1];
+	dev_table[devid].data[2] = target.data[2];
 
 	/*
 	 * A kdump kernel might be replacing a domain ID that was copied from
-- 
2.34.1
Re: [RFCv2 PATCH 2/7] iommu/amd: Refactor set_dte_entry
Posted by Jason Gunthorpe 1 year, 9 months ago
On Thu, Jan 11, 2024 at 06:06:41PM -0600, Suravee Suthikulpanit wrote:

> -	old_domid = dev_table[devid].data[1] & DEV_DOMID_MASK;
> -	dev_table[devid].data[1]  = flags;
> -	dev_table[devid].data[0]  = pte_root;
> +	dev_table[devid].data[0] = target.data[0];
> +	dev_table[devid].data[1] = target.data[1];
> +	dev_table[devid].data[2] = target.data[2];

Maybe you could have convinced me this doesn't have bugs today with 2
qword updates, but no way this is OK.

The multi qword update needs to be sequenced correctly so the HW
doesn't have tearing. The manual talks about this and it talks about
128 bit updates too.

This needs to follow the design ARM has where it uses the special
update logic to reliably sequence the change. Since nesting is making
the update more complex it becomes important.

Jason
RE: [RFCv2 PATCH 2/7] iommu/amd: Refactor set_dte_entry
Posted by Tian, Kevin 1 year, 10 months ago
> From: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
> Sent: Friday, January 12, 2024 8:07 AM
> 
> +
> +	/* Use system default */
> +	tmp = amd_iommu_gpt_level;
> +
> +	/* Mask out old values for GuestPagingMode */
> +	target->data[2] &= ~(0x3ULL << DTE_GPT_LEVEL_SHIFT);
> +	target->data[2] |= (tmp << DTE_GPT_LEVEL_SHIFT);

Just directly use amd_iommu_gpt_level here.

btw this sounds like a functional change as the original code only does
this for 5level:

		if (amd_iommu_gpt_level == PAGE_MODE_5_LEVEL) {
			dev_table[devid].data[2] |=
				((u64)GUEST_PGTABLE_5_LEVEL <<
DTE_GPT_LEVEL_SHIFT);
		}

If it's the desired change then better make it a separate fix.