[PATCH v10 5/7] iommu/vt-d: Make prq draining code generic

Jacob Pan posted 7 patches 2 years, 7 months ago
There is a newer version of this series
[PATCH v10 5/7] iommu/vt-d: Make prq draining code generic
Posted by Jacob Pan 2 years, 7 months ago
From: Lu Baolu <baolu.lu@linux.intel.com>

Currently draining page requests and responses for a pasid is part of SVA
implementation. This is because the driver only supports attaching an SVA
domain to a device pasid. As we are about to support attaching other types
of domains to a device pasid, the prq draining code becomes generic.

Reviewed-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
 drivers/iommu/intel/iommu.c | 30 +++++++++++++++++++-----------
 drivers/iommu/intel/iommu.h |  2 ++
 drivers/iommu/intel/svm.c   |  8 ++------
 3 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 40685cbfaf0e..4b54a56831b4 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4733,21 +4733,29 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
 	struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL);
 	struct iommu_domain *domain;
 
-	/* Domain type specific cleanup: */
 	domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
-	if (domain) {
-		switch (domain->type) {
-		case IOMMU_DOMAIN_SVA:
-			intel_svm_remove_dev_pasid(dev, pasid);
-			break;
-		default:
-			/* should never reach here */
-			WARN_ON(1);
-			break;
-		}
+	if (!domain)
+		goto out_tear_down;
+
+	/*
+	 * The SVA implementation needs to handle its own stuffs like the mm
+	 * notification. Before consolidating that code into iommu core, let
+	 * the intel sva code handle it.
+	 */
+	if (domain->type == IOMMU_DOMAIN_SVA) {
+		intel_svm_remove_dev_pasid(dev, pasid);
+		goto out_tear_down;
 	}
 
+	/*
+	 * Should never reach here until we add support for attaching
+	 * non-SVA domain to a pasid.
+	 */
+	WARN_ON(1);
+
+out_tear_down:
 	intel_pasid_tear_down_entry(iommu, dev, pasid, false);
+	intel_drain_pasid_prq(dev, pasid);
 }
 
 const struct iommu_ops intel_iommu_ops = {
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 1c5e1d88862b..6d94a29f5d52 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -844,6 +844,7 @@ int intel_svm_page_response(struct device *dev, struct iommu_fault_event *evt,
 			    struct iommu_page_response *msg);
 struct iommu_domain *intel_svm_domain_alloc(void);
 void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid);
+void intel_drain_pasid_prq(struct device *dev, u32 pasid);
 
 struct intel_svm_dev {
 	struct list_head list;
@@ -862,6 +863,7 @@ struct intel_svm {
 };
 #else
 static inline void intel_svm_check(struct intel_iommu *iommu) {}
+static inline void intel_drain_pasid_prq(struct device *dev, u32 pasid) {}
 static inline struct iommu_domain *intel_svm_domain_alloc(void)
 {
 	return NULL;
diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c
index 2a82864e9d57..588367a9e9b5 100644
--- a/drivers/iommu/intel/svm.c
+++ b/drivers/iommu/intel/svm.c
@@ -26,8 +26,6 @@
 #include "trace.h"
 
 static irqreturn_t prq_event_thread(int irq, void *d);
-static void intel_svm_drain_prq(struct device *dev, u32 pasid);
-#define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva)
 
 static DEFINE_XARRAY_ALLOC(pasid_private_array);
 static int pasid_private_add(ioasid_t pasid, void *priv)
@@ -391,8 +389,6 @@ void intel_svm_remove_dev_pasid(struct device *dev, u32 pasid)
 		 * large and has to be physically contiguous. So it's
 		 * hard to be as defensive as we might like.
 		 */
-		intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false);
-		intel_svm_drain_prq(dev, svm->pasid);
 		kfree_rcu(sdev, rcu);
 
 		if (list_empty(&svm->devs)) {
@@ -449,7 +445,7 @@ static bool is_canonical_address(u64 addr)
 }
 
 /**
- * intel_svm_drain_prq - Drain page requests and responses for a pasid
+ * intel_drain_pasid_prq - Drain page requests and responses for a pasid
  * @dev: target device
  * @pasid: pasid for draining
  *
@@ -463,7 +459,7 @@ static bool is_canonical_address(u64 addr)
  * described in VT-d spec CH7.10 to drain all page requests and page
  * responses pending in the hardware.
  */
-static void intel_svm_drain_prq(struct device *dev, u32 pasid)
+void intel_drain_pasid_prq(struct device *dev, u32 pasid)
 {
 	struct device_domain_info *info;
 	struct dmar_domain *domain;
-- 
2.25.1
RE: [PATCH v10 5/7] iommu/vt-d: Make prq draining code generic
Posted by Tian, Kevin 2 years, 7 months ago
> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
> Sent: Thursday, July 13, 2023 12:34 AM
> 
> -	/* Domain type specific cleanup: */
>  	domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
> -	if (domain) {
> -		switch (domain->type) {
> -		case IOMMU_DOMAIN_SVA:
> -			intel_svm_remove_dev_pasid(dev, pasid);
> -			break;
> -		default:
> -			/* should never reach here */
> -			WARN_ON(1);
> -			break;
> -		}
> +	if (!domain)
> +		goto out_tear_down;

WARN_ON()

>  		 * large and has to be physically contiguous. So it's
>  		 * hard to be as defensive as we might like.
>  		 */
> -		intel_pasid_tear_down_entry(iommu, dev, svm->pasid,
> false);
> -		intel_svm_drain_prq(dev, svm->pasid);

after removing the 2 lines the comment above becomes stale.

> -static void intel_svm_drain_prq(struct device *dev, u32 pasid)
> +void intel_drain_pasid_prq(struct device *dev, u32 pasid)
>  {
>  	struct device_domain_info *info;
>  	struct dmar_domain *domain;

later we should move generic prq handling logic out of svm.c into
a new prq.c
Re: [PATCH v10 5/7] iommu/vt-d: Make prq draining code generic
Posted by Baolu Lu 2 years, 6 months ago
On 2023/7/13 15:49, Tian, Kevin wrote:
>> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
>> Sent: Thursday, July 13, 2023 12:34 AM
>>
>> -	/* Domain type specific cleanup: */
>>   	domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
>> -	if (domain) {
>> -		switch (domain->type) {
>> -		case IOMMU_DOMAIN_SVA:
>> -			intel_svm_remove_dev_pasid(dev, pasid);
>> -			break;
>> -		default:
>> -			/* should never reach here */
>> -			WARN_ON(1);
>> -			break;
>> -		}
>> +	if (!domain)
>> +		goto out_tear_down;
> 
> WARN_ON()

Why?

My understanding is that remve_device_pasid could be call in any context
including no domain attached.

> 
>>   		 * large and has to be physically contiguous. So it's
>>   		 * hard to be as defensive as we might like.
>>   		 */
>> -		intel_pasid_tear_down_entry(iommu, dev, svm->pasid,
>> false);
>> -		intel_svm_drain_prq(dev, svm->pasid);
> 
> after removing the 2 lines the comment above becomes stale.

Yes.

> 
>> -static void intel_svm_drain_prq(struct device *dev, u32 pasid)
>> +void intel_drain_pasid_prq(struct device *dev, u32 pasid)
>>   {
>>   	struct device_domain_info *info;
>>   	struct dmar_domain *domain;
> 
> later we should move generic prq handling logic out of svm.c into
> a new prq.c

Yes.

Best regards,
baolu
RE: [PATCH v10 5/7] iommu/vt-d: Make prq draining code generic
Posted by Tian, Kevin 2 years, 6 months ago
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Friday, July 14, 2023 11:28 AM
> 
> On 2023/7/13 15:49, Tian, Kevin wrote:
> >> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
> >> Sent: Thursday, July 13, 2023 12:34 AM
> >>
> >> -	/* Domain type specific cleanup: */
> >>   	domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
> >> -	if (domain) {
> >> -		switch (domain->type) {
> >> -		case IOMMU_DOMAIN_SVA:
> >> -			intel_svm_remove_dev_pasid(dev, pasid);
> >> -			break;
> >> -		default:
> >> -			/* should never reach here */
> >> -			WARN_ON(1);
> >> -			break;
> >> -		}
> >> +	if (!domain)
> >> +		goto out_tear_down;
> >
> > WARN_ON()
> 
> Why?
> 
> My understanding is that remve_device_pasid could be call in any context
> including no domain attached.
> 

oh I'm not aware of that. Can you elaborate the usage which uses a pasid
w/o domain? pasid needs to point to a page table. Presumably every
page table should be wrapped by a iommu domain...
Re: [PATCH v10 5/7] iommu/vt-d: Make prq draining code generic
Posted by Baolu Lu 2 years, 6 months ago
On 2023/7/14 11:49, Tian, Kevin wrote:
>> From: Baolu Lu <baolu.lu@linux.intel.com>
>> Sent: Friday, July 14, 2023 11:28 AM
>>
>> On 2023/7/13 15:49, Tian, Kevin wrote:
>>>> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>>> Sent: Thursday, July 13, 2023 12:34 AM
>>>>
>>>> -	/* Domain type specific cleanup: */
>>>>    	domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
>>>> -	if (domain) {
>>>> -		switch (domain->type) {
>>>> -		case IOMMU_DOMAIN_SVA:
>>>> -			intel_svm_remove_dev_pasid(dev, pasid);
>>>> -			break;
>>>> -		default:
>>>> -			/* should never reach here */
>>>> -			WARN_ON(1);
>>>> -			break;
>>>> -		}
>>>> +	if (!domain)
>>>> +		goto out_tear_down;
>>>
>>> WARN_ON()
>>
>> Why?
>>
>> My understanding is that remve_device_pasid could be call in any context
>> including no domain attached.
>>
> 
> oh I'm not aware of that. Can you elaborate the usage which uses a pasid
> w/o domain? pasid needs to point to a page table. Presumably every
> page table should be wrapped by a iommu domain...

A case I can think of is error rewinding. A domain is being attached to
multiple pasids. When one of them is failed, remove_device_pasid should
be called on all pasids so that they are parked at a determinant state.

On the other hand, I don't want the remove_device_pasid to be the
counterpart of attach_dev_pasid. remove_device_pasid simply denotes:

- The pasid will be parked in blocking state;
- If any domain that has been attached to this pasid, stop reference to
   it any more. Otherwise, there might be use-after-free issues.

Hence, remove_device_pasid should never fail.

Best regards,
baolu
RE: [PATCH v10 5/7] iommu/vt-d: Make prq draining code generic
Posted by Tian, Kevin 2 years, 6 months ago
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Friday, July 14, 2023 1:47 PM
> 
> On 2023/7/14 11:49, Tian, Kevin wrote:
> >> From: Baolu Lu <baolu.lu@linux.intel.com>
> >> Sent: Friday, July 14, 2023 11:28 AM
> >>
> >> On 2023/7/13 15:49, Tian, Kevin wrote:
> >>>> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
> >>>> Sent: Thursday, July 13, 2023 12:34 AM
> >>>>
> >>>> -	/* Domain type specific cleanup: */
> >>>>    	domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
> >>>> -	if (domain) {
> >>>> -		switch (domain->type) {
> >>>> -		case IOMMU_DOMAIN_SVA:
> >>>> -			intel_svm_remove_dev_pasid(dev, pasid);
> >>>> -			break;
> >>>> -		default:
> >>>> -			/* should never reach here */
> >>>> -			WARN_ON(1);
> >>>> -			break;
> >>>> -		}
> >>>> +	if (!domain)
> >>>> +		goto out_tear_down;
> >>>
> >>> WARN_ON()
> >>
> >> Why?
> >>
> >> My understanding is that remve_device_pasid could be call in any context
> >> including no domain attached.
> >>
> >
> > oh I'm not aware of that. Can you elaborate the usage which uses a pasid
> > w/o domain? pasid needs to point to a page table. Presumably every
> > page table should be wrapped by a iommu domain...
> 
> A case I can think of is error rewinding. A domain is being attached to
> multiple pasids. When one of them is failed, remove_device_pasid should
> be called on all pasids so that they are parked at a determinant state.

Can you elaborate what is the association among those pasid's so failing
one would lead to failing all?

Just like a domain can be attached to multiple devices. I don't think there
is an unwinding policy forcing to detach all devices just because there is
a failure attaching the domain to a new one.

> 
> On the other hand, I don't want the remove_device_pasid to be the
> counterpart of attach_dev_pasid. remove_device_pasid simply denotes:
> 
> - The pasid will be parked in blocking state;
> - If any domain that has been attached to this pasid, stop reference to
>    it any more. Otherwise, there might be use-after-free issues.
> 
> Hence, remove_device_pasid should never fail.
> 

It should never fail. But could warn if there is a condition which shouldn't
be hit. 😊
Re: [PATCH v10 5/7] iommu/vt-d: Make prq draining code generic
Posted by Baolu Lu 2 years, 6 months ago
On 2023/7/19 13:39, Tian, Kevin wrote:
>> From: Baolu Lu <baolu.lu@linux.intel.com>
>> Sent: Friday, July 14, 2023 1:47 PM
>>
>> On 2023/7/14 11:49, Tian, Kevin wrote:
>>>> From: Baolu Lu <baolu.lu@linux.intel.com>
>>>> Sent: Friday, July 14, 2023 11:28 AM
>>>>
>>>> On 2023/7/13 15:49, Tian, Kevin wrote:
>>>>>> From: Jacob Pan <jacob.jun.pan@linux.intel.com>
>>>>>> Sent: Thursday, July 13, 2023 12:34 AM
>>>>>>
>>>>>> -	/* Domain type specific cleanup: */
>>>>>>     	domain = iommu_get_domain_for_dev_pasid(dev, pasid, 0);
>>>>>> -	if (domain) {
>>>>>> -		switch (domain->type) {
>>>>>> -		case IOMMU_DOMAIN_SVA:
>>>>>> -			intel_svm_remove_dev_pasid(dev, pasid);
>>>>>> -			break;
>>>>>> -		default:
>>>>>> -			/* should never reach here */
>>>>>> -			WARN_ON(1);
>>>>>> -			break;
>>>>>> -		}
>>>>>> +	if (!domain)
>>>>>> +		goto out_tear_down;
>>>>>
>>>>> WARN_ON()
>>>>
>>>> Why?
>>>>
>>>> My understanding is that remve_device_pasid could be call in any context
>>>> including no domain attached.
>>>>
>>>
>>> oh I'm not aware of that. Can you elaborate the usage which uses a pasid
>>> w/o domain? pasid needs to point to a page table. Presumably every
>>> page table should be wrapped by a iommu domain...
>>
>> A case I can think of is error rewinding. A domain is being attached to
>> multiple pasids. When one of them is failed, remove_device_pasid should
>> be called on all pasids so that they are parked at a determinant state.
> 
> Can you elaborate what is the association among those pasid's so failing
> one would lead to failing all?
> 
> Just like a domain can be attached to multiple devices. I don't think there
> is an unwinding policy forcing to detach all devices just because there is
> a failure attaching the domain to a new one.

It's fine to add a check here if there's no real case. I was just
thinking that remove_device_pasid could also be invoked when there's no
domain attached.

>>
>> On the other hand, I don't want the remove_device_pasid to be the
>> counterpart of attach_dev_pasid. remove_device_pasid simply denotes:
>>
>> - The pasid will be parked in blocking state;
>> - If any domain that has been attached to this pasid, stop reference to
>>     it any more. Otherwise, there might be use-after-free issues.
>>
>> Hence, remove_device_pasid should never fail.
>>
> 
> It should never fail. But could warn if there is a condition which shouldn't
> be hit. 😊

Okay, let's add a check here. And we can loose it later if any real use
case comes.

Best regards,
baolu