[PATCH 3/7] iommu/vt-d: Enhance compatibility check for paging domain attach

Lu Baolu posted 7 patches 1 month, 2 weeks ago
There is a newer version of this series
[PATCH 3/7] iommu/vt-d: Enhance compatibility check for paging domain attach
Posted by Lu Baolu 1 month, 2 weeks ago
The driver now supports domain_alloc_paging, ensuring that a valid device
pointer is provided whenever a paging domain is allocated. Additionally,
the dmar_domain attributes are set up at the time of allocation.

Consistent with the established semantics in the IOMMU core, if a domain is
attached to a device and found to be incompatible with the IOMMU hardware
capabilities, the operation will return an -EINVAL error. This implicitly
advises the caller to allocate a new domain for the device and attempt the
domain attachment again.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 drivers/iommu/intel/iommu.c | 70 ++++++++++++-------------------------
 drivers/iommu/intel/pasid.c | 28 +--------------
 2 files changed, 24 insertions(+), 74 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index dd158ff5fd45..f6a3266b17d4 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -1606,7 +1606,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 	int translation = CONTEXT_TT_MULTI_LEVEL;
 	struct dma_pte *pgd = domain->pgd;
 	struct context_entry *context;
-	int agaw, ret;
+	int ret;
 
 	pr_debug("Set context mapping for %02x:%02x.%d\n",
 		bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
@@ -1623,27 +1623,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
 
 	copied_context_tear_down(iommu, context, bus, devfn);
 	context_clear_entry(context);
-
 	context_set_domain_id(context, did);
 
-	/*
-	 * Skip top levels of page tables for iommu which has
-	 * less agaw than default. Unnecessary for PT mode.
-	 */
-	for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
-		ret = -ENOMEM;
-		pgd = phys_to_virt(dma_pte_addr(pgd));
-		if (!dma_pte_present(pgd))
-			goto out_unlock;
-	}
-
 	if (info && info->ats_supported)
 		translation = CONTEXT_TT_DEV_IOTLB;
 	else
 		translation = CONTEXT_TT_MULTI_LEVEL;
 
 	context_set_address_root(context, virt_to_phys(pgd));
-	context_set_address_width(context, agaw);
+	context_set_address_width(context, domain->agaw);
 	context_set_translation_type(context, translation);
 	context_set_fault_enable(context);
 	context_set_present(context);
@@ -1876,20 +1864,9 @@ static int domain_setup_first_level(struct intel_iommu *iommu,
 				    u32 pasid)
 {
 	struct dma_pte *pgd = domain->pgd;
-	int agaw, level;
-	int flags = 0;
+	int level, flags = 0;
 
-	/*
-	 * Skip top levels of page tables for iommu which has
-	 * less agaw than default. Unnecessary for PT mode.
-	 */
-	for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
-		pgd = phys_to_virt(dma_pte_addr(pgd));
-		if (!dma_pte_present(pgd))
-			return -ENOMEM;
-	}
-
-	level = agaw_to_level(agaw);
+	level = agaw_to_level(domain->agaw);
 	if (level != 4 && level != 5)
 		return -EINVAL;
 
@@ -3506,27 +3483,26 @@ int prepare_domain_attach_device(struct iommu_domain *domain,
 	if (domain->dirty_ops && !ssads_supported(iommu))
 		return -EINVAL;
 
-	/* check if this iommu agaw is sufficient for max mapped address */
-	addr_width = agaw_to_width(iommu->agaw);
-	if (addr_width > cap_mgaw(iommu->cap))
-		addr_width = cap_mgaw(iommu->cap);
-
-	if (dmar_domain->max_addr > (1LL << addr_width))
+	if (dmar_domain->iommu_coherency !=
+			iommu_paging_structure_coherency(iommu))
 		return -EINVAL;
-	dmar_domain->gaw = addr_width;
-
-	/*
-	 * Knock out extra levels of page tables if necessary
-	 */
-	while (iommu->agaw < dmar_domain->agaw) {
-		struct dma_pte *pte;
-
-		pte = dmar_domain->pgd;
-		if (dma_pte_present(pte)) {
-			dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
-			iommu_free_page(pte);
-		}
-		dmar_domain->agaw--;
+
+	if (domain->type & __IOMMU_DOMAIN_PAGING) {
+		if (dmar_domain->iommu_superpage !=
+				iommu_superpage_capability(iommu, dmar_domain->use_first_level))
+			return -EINVAL;
+
+		if (dmar_domain->use_first_level &&
+		    (!sm_supported(iommu) || !ecap_flts(iommu->ecap)))
+			return -EINVAL;
+
+		/* check if this iommu agaw is sufficient for max mapped address */
+		addr_width = agaw_to_width(iommu->agaw);
+		if (addr_width > cap_mgaw(iommu->cap))
+			addr_width = cap_mgaw(iommu->cap);
+
+		if (dmar_domain->gaw > addr_width || dmar_domain->agaw > iommu->agaw)
+			return -EINVAL;
 	}
 
 	if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev) &&
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 2e5fa0a23299..53157e1194f4 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -345,25 +345,6 @@ int intel_pasid_setup_first_level(struct intel_iommu *iommu,
 	return 0;
 }
 
-/*
- * Skip top levels of page tables for iommu which has less agaw
- * than default. Unnecessary for PT mode.
- */
-static int iommu_skip_agaw(struct dmar_domain *domain,
-			   struct intel_iommu *iommu,
-			   struct dma_pte **pgd)
-{
-	int agaw;
-
-	for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
-		*pgd = phys_to_virt(dma_pte_addr(*pgd));
-		if (!dma_pte_present(*pgd))
-			return -EINVAL;
-	}
-
-	return agaw;
-}
-
 /*
  * Set up the scalable mode pasid entry for second only translation type.
  */
@@ -374,7 +355,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
 	struct pasid_entry *pte;
 	struct dma_pte *pgd;
 	u64 pgd_val;
-	int agaw;
 	u16 did;
 
 	/*
@@ -388,12 +368,6 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
 	}
 
 	pgd = domain->pgd;
-	agaw = iommu_skip_agaw(domain, iommu, &pgd);
-	if (agaw < 0) {
-		dev_err(dev, "Invalid domain page table\n");
-		return -EINVAL;
-	}
-
 	pgd_val = virt_to_phys(pgd);
 	did = domain_id_iommu(domain, iommu);
 
@@ -412,7 +386,7 @@ int intel_pasid_setup_second_level(struct intel_iommu *iommu,
 	pasid_clear_entry(pte);
 	pasid_set_domain_id(pte, did);
 	pasid_set_slptr(pte, pgd_val);
-	pasid_set_address_width(pte, agaw);
+	pasid_set_address_width(pte, domain->agaw);
 	pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY);
 	pasid_set_fault_enable(pte);
 	pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap));
-- 
2.43.0
Re: [PATCH 3/7] iommu/vt-d: Enhance compatibility check for paging domain attach
Posted by Jason Gunthorpe 1 month, 2 weeks ago
On Fri, Oct 11, 2024 at 12:27:18PM +0800, Lu Baolu wrote:

> @@ -1623,27 +1623,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
>  
>  	copied_context_tear_down(iommu, context, bus, devfn);
>  	context_clear_entry(context);
> -
>  	context_set_domain_id(context, did);
>  
> -	/*
> -	 * Skip top levels of page tables for iommu which has
> -	 * less agaw than default. Unnecessary for PT mode.
> -	 */
> -	for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
> -		ret = -ENOMEM;
> -		pgd = phys_to_virt(dma_pte_addr(pgd));
> -		if (!dma_pte_present(pgd))
> -			goto out_unlock;
> -	}

Yikes, this is nasty racy stuff, glad to see it go

But should the agaw stuff be in its own patch?

> @@ -3506,27 +3483,26 @@ int prepare_domain_attach_device(struct iommu_domain *domain,
>  	if (domain->dirty_ops && !ssads_supported(iommu))
>  		return -EINVAL;
>  
> -	/* check if this iommu agaw is sufficient for max mapped address */
> -	addr_width = agaw_to_width(iommu->agaw);
> -	if (addr_width > cap_mgaw(iommu->cap))
> -		addr_width = cap_mgaw(iommu->cap);
> -
> -	if (dmar_domain->max_addr > (1LL << addr_width))
> +	if (dmar_domain->iommu_coherency !=
> +			iommu_paging_structure_coherency(iommu))
>  		return -EINVAL;
> -	dmar_domain->gaw = addr_width;
> -
> -	/*
> -	 * Knock out extra levels of page tables if necessary
> -	 */
> -	while (iommu->agaw < dmar_domain->agaw) {
> -		struct dma_pte *pte;
> -
> -		pte = dmar_domain->pgd;
> -		if (dma_pte_present(pte)) {
> -			dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
> -			iommu_free_page(pte);
> -		}
> -		dmar_domain->agaw--;
> +
> +	if (domain->type & __IOMMU_DOMAIN_PAGING) {

It looks like this entire function is already never called for
anything but paging?

The only three callers are:

	.default_domain_ops = &(const struct iommu_domain_ops) {
		.attach_dev		= intel_iommu_attach_device,
		.set_dev_pasid		= intel_iommu_set_dev_pasid,

and

static const struct iommu_domain_ops intel_nested_domain_ops = {
	.attach_dev		= intel_nested_attach_dev,

And none of those cases can be anything except a paging domain by
definition.

So this if should go away, or be turned into a WARN_ON.

Jason
Re: [PATCH 3/7] iommu/vt-d: Enhance compatibility check for paging domain attach
Posted by Baolu Lu 1 month, 2 weeks ago
On 2024/10/12 0:27, Jason Gunthorpe wrote:
> On Fri, Oct 11, 2024 at 12:27:18PM +0800, Lu Baolu wrote:
> 
>> @@ -1623,27 +1623,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain,
>>   
>>   	copied_context_tear_down(iommu, context, bus, devfn);
>>   	context_clear_entry(context);
>> -
>>   	context_set_domain_id(context, did);
>>   
>> -	/*
>> -	 * Skip top levels of page tables for iommu which has
>> -	 * less agaw than default. Unnecessary for PT mode.
>> -	 */
>> -	for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) {
>> -		ret = -ENOMEM;
>> -		pgd = phys_to_virt(dma_pte_addr(pgd));
>> -		if (!dma_pte_present(pgd))
>> -			goto out_unlock;
>> -	}
> 
> Yikes, this is nasty racy stuff, glad to see it go
> 
> But should the agaw stuff be in its own patch?

The agaw stuff is now in the paging domain allocation path.

'agaw' represents the levels that the page table could support. With the
domain allocation callbacks always have a valid device pointer, the page
table levels of a domain could be determined during allocation.

When the domain is attached to a new device, the domain's levels will be
checked again the iommu capabilities of the device. If the iommu is not
capable of supporting the page levels, -EINVAL (not compatible) will be
returned, suggesting that the caller should use a new domain for the
device.

> 
>> @@ -3506,27 +3483,26 @@ int prepare_domain_attach_device(struct iommu_domain *domain,
>>   	if (domain->dirty_ops && !ssads_supported(iommu))
>>   		return -EINVAL;
>>   
>> -	/* check if this iommu agaw is sufficient for max mapped address */
>> -	addr_width = agaw_to_width(iommu->agaw);
>> -	if (addr_width > cap_mgaw(iommu->cap))
>> -		addr_width = cap_mgaw(iommu->cap);
>> -
>> -	if (dmar_domain->max_addr > (1LL << addr_width))
>> +	if (dmar_domain->iommu_coherency !=
>> +			iommu_paging_structure_coherency(iommu))
>>   		return -EINVAL;
>> -	dmar_domain->gaw = addr_width;
>> -
>> -	/*
>> -	 * Knock out extra levels of page tables if necessary
>> -	 */
>> -	while (iommu->agaw < dmar_domain->agaw) {
>> -		struct dma_pte *pte;
>> -
>> -		pte = dmar_domain->pgd;
>> -		if (dma_pte_present(pte)) {
>> -			dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte));
>> -			iommu_free_page(pte);
>> -		}
>> -		dmar_domain->agaw--;
>> +
>> +	if (domain->type & __IOMMU_DOMAIN_PAGING) {
> 
> It looks like this entire function is already never called for
> anything but paging?
> 
> The only three callers are:
> 
> 	.default_domain_ops = &(const struct iommu_domain_ops) {
> 		.attach_dev		= intel_iommu_attach_device,
> 		.set_dev_pasid		= intel_iommu_set_dev_pasid,
> 
> and
> 
> static const struct iommu_domain_ops intel_nested_domain_ops = {
> 	.attach_dev		= intel_nested_attach_dev,
> 
> And none of those cases can be anything except a paging domain by
> definition.

A nested domain is not a paging domain. It represents a user-space page
table that nested on a parent paging domain. Perhaps I overlooked
anything?

> 
> So this if should go away, or be turned into a WARN_ON.
> 
> Jason

Thanks,
baolu
Re: [PATCH 3/7] iommu/vt-d: Enhance compatibility check for paging domain attach
Posted by Jason Gunthorpe 1 month, 1 week ago
On Mon, Oct 14, 2024 at 09:25:03AM +0800, Baolu Lu wrote:
> > > +	if (domain->type & __IOMMU_DOMAIN_PAGING) {
> > 
> > It looks like this entire function is already never called for
> > anything but paging?
> > 
> > The only three callers are:
> > 
> > 	.default_domain_ops = &(const struct iommu_domain_ops) {
> > 		.attach_dev		= intel_iommu_attach_device,
> > 		.set_dev_pasid		= intel_iommu_set_dev_pasid,
> > 
> > and
> > 
> > static const struct iommu_domain_ops intel_nested_domain_ops = {
> > 	.attach_dev		= intel_nested_attach_dev,
> > 
> > And none of those cases can be anything except a paging domain by
> > definition.
> 
> A nested domain is not a paging domain. It represents a user-space page
> table that nested on a parent paging domain. Perhaps I overlooked
> anything?

It only calls it on the s2_parent which is always a paging domain?

	ret = prepare_domain_attach_device(&dmar_domain->s2_domain->domain, dev);

Jason
Re: [PATCH 3/7] iommu/vt-d: Enhance compatibility check for paging domain attach
Posted by Baolu Lu 1 month, 1 week ago
On 2024/10/15 3:24, Jason Gunthorpe wrote:
> On Mon, Oct 14, 2024 at 09:25:03AM +0800, Baolu Lu wrote:
>>>> +	if (domain->type & __IOMMU_DOMAIN_PAGING) {
>>> It looks like this entire function is already never called for
>>> anything but paging?
>>>
>>> The only three callers are:
>>>
>>> 	.default_domain_ops = &(const struct iommu_domain_ops) {
>>> 		.attach_dev		= intel_iommu_attach_device,
>>> 		.set_dev_pasid		= intel_iommu_set_dev_pasid,
>>>
>>> and
>>>
>>> static const struct iommu_domain_ops intel_nested_domain_ops = {
>>> 	.attach_dev		= intel_nested_attach_dev,
>>>
>>> And none of those cases can be anything except a paging domain by
>>> definition.
>> A nested domain is not a paging domain. It represents a user-space page
>> table that nested on a parent paging domain. Perhaps I overlooked
>> anything?
> It only calls it on the s2_parent which is always a paging domain?
> 
> 	ret = prepare_domain_attach_device(&dmar_domain->s2_domain->domain, dev);

Yea, you are right. I overlooked that part.  I'll remove the 'if'
statement and utilize a WARN_ON() function instead.

And also, I will rename this function with a meaningful name,some like
paging_domain_is_compatible()?

Thanks,
baolu
Re: [PATCH 3/7] iommu/vt-d: Enhance compatibility check for paging domain attach
Posted by Jason Gunthorpe 1 month, 1 week ago
On Tue, Oct 15, 2024 at 10:52:19AM +0800, Baolu Lu wrote:
> On 2024/10/15 3:24, Jason Gunthorpe wrote:
> > On Mon, Oct 14, 2024 at 09:25:03AM +0800, Baolu Lu wrote:
> > > > > +	if (domain->type & __IOMMU_DOMAIN_PAGING) {
> > > > It looks like this entire function is already never called for
> > > > anything but paging?
> > > > 
> > > > The only three callers are:
> > > > 
> > > > 	.default_domain_ops = &(const struct iommu_domain_ops) {
> > > > 		.attach_dev		= intel_iommu_attach_device,
> > > > 		.set_dev_pasid		= intel_iommu_set_dev_pasid,
> > > > 
> > > > and
> > > > 
> > > > static const struct iommu_domain_ops intel_nested_domain_ops = {
> > > > 	.attach_dev		= intel_nested_attach_dev,
> > > > 
> > > > And none of those cases can be anything except a paging domain by
> > > > definition.
> > > A nested domain is not a paging domain. It represents a user-space page
> > > table that nested on a parent paging domain. Perhaps I overlooked
> > > anything?
> > It only calls it on the s2_parent which is always a paging domain?
> > 
> > 	ret = prepare_domain_attach_device(&dmar_domain->s2_domain->domain, dev);
> 
> Yea, you are right. I overlooked that part.  I'll remove the 'if'
> statement and utilize a WARN_ON() function instead.
> 
> And also, I will rename this function with a meaningful name,some like
> paging_domain_is_compatible()?

That sounds good too

Ultimately you want to try to structure the driver so that there is a
struct paging_domain that is always the paging domain type and
everything is easy to understand. Don't re-use the same struct for
identity/blocked/nested domains.

Jason
Re: [PATCH 3/7] iommu/vt-d: Enhance compatibility check for paging domain attach
Posted by Baolu Lu 1 month, 1 week ago
On 2024/10/15 20:48, Jason Gunthorpe wrote:
> On Tue, Oct 15, 2024 at 10:52:19AM +0800, Baolu Lu wrote:
>> On 2024/10/15 3:24, Jason Gunthorpe wrote:
>>> On Mon, Oct 14, 2024 at 09:25:03AM +0800, Baolu Lu wrote:
>>>>>> +	if (domain->type & __IOMMU_DOMAIN_PAGING) {
>>>>> It looks like this entire function is already never called for
>>>>> anything but paging?
>>>>>
>>>>> The only three callers are:
>>>>>
>>>>> 	.default_domain_ops = &(const struct iommu_domain_ops) {
>>>>> 		.attach_dev		= intel_iommu_attach_device,
>>>>> 		.set_dev_pasid		= intel_iommu_set_dev_pasid,
>>>>>
>>>>> and
>>>>>
>>>>> static const struct iommu_domain_ops intel_nested_domain_ops = {
>>>>> 	.attach_dev		= intel_nested_attach_dev,
>>>>>
>>>>> And none of those cases can be anything except a paging domain by
>>>>> definition.
>>>> A nested domain is not a paging domain. It represents a user-space page
>>>> table that nested on a parent paging domain. Perhaps I overlooked
>>>> anything?
>>> It only calls it on the s2_parent which is always a paging domain?
>>>
>>> 	ret = prepare_domain_attach_device(&dmar_domain->s2_domain->domain, dev);
>> Yea, you are right. I overlooked that part.  I'll remove the 'if'
>> statement and utilize a WARN_ON() function instead.
>>
>> And also, I will rename this function with a meaningful name,some like
>> paging_domain_is_compatible()?
> That sounds good too
> 
> Ultimately you want to try to structure the driver so that there is a
> struct paging_domain that is always the paging domain type and
> everything is easy to understand. Don't re-use the same struct for
> identity/blocked/nested domains.

Yes, agreed!

Thanks,
baolu