[v1] iommufd/fault: Two bug fixes prior to vEVENTQ

[PATCH rc 2/2] iommufd/fault: Use a separate spinlock to protect fault->deliver list

Posted by Nicolin Chen 1 year ago

The fault->mutex was to serialize the fault read()/write() fops and the
iommufd_fault_auto_response_faults(). And it was also conveniently used
to protect fault->deliver in poll() and iommufd_fault_iopf_handler().

However, copy_from/to_user() may sleep if pagefaults are enabled. Thus,
they could take a long time to wait for user pages to swap in, blocking
iommufd_fault_iopf_handler() and its caller that is typically a shared
IRQ handler of an IOMMU driver, resulting in a potential global DOS.

Instead of resuing the mutex to protect the fault->deliver list, add a
separate spinlock to do the job, so iommufd_fault_iopf_handler() would
no longer be blocked by copy_from/to_user().

Provide two list manipulation helpers for fault->deliver:
 - Extract the first iopf_group out of the fault->deliver list
 - Restore an iopf_group back to the head of the fault->deliver list

Replace list_first_entry and list_for_each accordingly.

Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object")
Cc: stable@vger.kernel.org
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
 drivers/iommu/iommufd/iommufd_private.h | 26 +++++++++++++++
 drivers/iommu/iommufd/fault.c           | 43 ++++++++++++++-----------
 2 files changed, 50 insertions(+), 19 deletions(-)

diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index b6d706cf2c66..d3097c857abf 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -445,12 +445,38 @@ struct iommufd_fault {
 
 	/* The lists of outstanding faults protected by below mutex. */
 	struct mutex mutex;
+	spinlock_t lock; /* protects the deliver list */
 	struct list_head deliver;
 	struct xarray response;
 
 	struct wait_queue_head wait_queue;
 };
 
+/* Extract the first node out of the fault->deliver list */
+static inline struct iopf_group *
+iommufd_fault_deliver_extract(struct iommufd_fault *fault)
+{
+	struct list_head *list = &fault->deliver;
+	struct iopf_group *group = NULL;
+
+	spin_lock(&fault->lock);
+	if (!list_empty(list)) {
+		group = list_first_entry(list, struct iopf_group, node);
+		list_del(&group->node);
+	}
+	spin_unlock(&fault->lock);
+	return group;
+}
+
+/* Restore a node back to the head in fault->deliver */
+static inline void iommufd_fault_deliver_restore(struct iommufd_fault *fault,
+						 struct iopf_group *group)
+{
+	spin_lock(&fault->lock);
+	list_add(&fault->deliver, &group->node);
+	spin_unlock(&fault->lock);
+}
+
 struct iommufd_attach_handle {
 	struct iommu_attach_handle handle;
 	struct iommufd_device *idev;
diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
index 685510224d05..fa69240daa28 100644
--- a/drivers/iommu/iommufd/fault.c
+++ b/drivers/iommu/iommufd/fault.c
@@ -102,17 +102,19 @@ static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
 					 struct iommufd_attach_handle *handle)
 {
 	struct iommufd_fault *fault = hwpt->fault;
-	struct iopf_group *group, *next;
+	struct iopf_group *group;
 	unsigned long index;
 
 	if (!fault)
 		return;
 
 	mutex_lock(&fault->mutex);
-	list_for_each_entry_safe(group, next, &fault->deliver, node) {
-		if (group->attach_handle != &handle->handle)
+	for (group = iommufd_fault_deliver_extract(fault); group;
+	     group = iommufd_fault_deliver_extract(fault)) {
+		if (group->attach_handle != &handle->handle) {
+			iommufd_fault_deliver_restore(fault, group);
 			continue;
-		list_del(&group->node);
+		}
 		iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
 		iopf_free_group(group);
 	}
@@ -212,7 +214,7 @@ int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
 void iommufd_fault_destroy(struct iommufd_object *obj)
 {
 	struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj);
-	struct iopf_group *group, *next;
+	struct iopf_group *group;
 	unsigned long index;
 
 	/*
@@ -221,8 +223,8 @@ void iommufd_fault_destroy(struct iommufd_object *obj)
 	 * accessing this pointer. Therefore, acquiring the mutex here
 	 * is unnecessary.
 	 */
-	list_for_each_entry_safe(group, next, &fault->deliver, node) {
-		list_del(&group->node);
+	for (group = iommufd_fault_deliver_extract(fault); group;
+	     group = iommufd_fault_deliver_extract(fault)) {
 		iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
 		iopf_free_group(group);
 	}
@@ -266,17 +268,20 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
 		return -ESPIPE;
 
 	mutex_lock(&fault->mutex);
-	while (!list_empty(&fault->deliver) && count > done) {
-		group = list_first_entry(&fault->deliver,
-					 struct iopf_group, node);
-
-		if (group->fault_count * fault_size > count - done)
+	for (group = iommufd_fault_deliver_extract(fault); group;
+	     group = iommufd_fault_deliver_extract(fault)) {
+		if (done >= count ||
+		    group->fault_count * fault_size > count - done) {
+			iommufd_fault_deliver_restore(fault, group);
 			break;
+		}
 
 		rc = xa_alloc(&fault->response, &group->cookie, group,
 			      xa_limit_32b, GFP_KERNEL);
-		if (rc)
+		if (rc) {
+			iommufd_fault_deliver_restore(fault, group);
 			break;
+		}
 
 		idev = to_iommufd_handle(group->attach_handle)->idev;
 		list_for_each_entry(iopf, &group->faults, list) {
@@ -284,14 +289,13 @@ static ssize_t iommufd_fault_fops_read(struct file *filep, char __user *buf,
 						      &data, idev,
 						      group->cookie);
 			if (copy_to_user(buf + done, &data, fault_size)) {
+				iommufd_fault_deliver_restore(fault, group);
 				xa_erase(&fault->response, group->cookie);
 				rc = -EFAULT;
 				break;
 			}
 			done += fault_size;
 		}
-
-		list_del(&group->node);
 	}
 	mutex_unlock(&fault->mutex);
 
@@ -349,10 +353,10 @@ static __poll_t iommufd_fault_fops_poll(struct file *filep,
 	__poll_t pollflags = EPOLLOUT;
 
 	poll_wait(filep, &fault->wait_queue, wait);
-	mutex_lock(&fault->mutex);
+	spin_lock(&fault->lock);
 	if (!list_empty(&fault->deliver))
 		pollflags |= EPOLLIN | EPOLLRDNORM;
-	mutex_unlock(&fault->mutex);
+	spin_unlock(&fault->lock);
 
 	return pollflags;
 }
@@ -394,6 +398,7 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
 	INIT_LIST_HEAD(&fault->deliver);
 	xa_init_flags(&fault->response, XA_FLAGS_ALLOC1);
 	mutex_init(&fault->mutex);
+	spin_lock_init(&fault->lock);
 	init_waitqueue_head(&fault->wait_queue);
 
 	filep = anon_inode_getfile("[iommufd-pgfault]", &iommufd_fault_fops,
@@ -442,9 +447,9 @@ int iommufd_fault_iopf_handler(struct iopf_group *group)
 	hwpt = group->attach_handle->domain->fault_data;
 	fault = hwpt->fault;
 
-	mutex_lock(&fault->mutex);
+	spin_lock(&fault->lock);
 	list_add_tail(&group->node, &fault->deliver);
-	mutex_unlock(&fault->mutex);
+	spin_unlock(&fault->lock);
 
 	wake_up_interruptible(&fault->wait_queue);
 
-- 
2.43.0

Re: [PATCH rc 2/2] iommufd/fault: Use a separate spinlock to protect fault->deliver list

Posted by Baolu Lu 1 year ago

On 1/15/25 07:28, Nicolin Chen wrote:
> The fault->mutex was to serialize the fault read()/write() fops and the
> iommufd_fault_auto_response_faults(). And it was also conveniently used
> to protect fault->deliver in poll() and iommufd_fault_iopf_handler().
> 
> However, copy_from/to_user() may sleep if pagefaults are enabled. Thus,
> they could take a long time to wait for user pages to swap in, blocking
> iommufd_fault_iopf_handler() and its caller that is typically a shared
> IRQ handler of an IOMMU driver, resulting in a potential global DOS.
> 
> Instead of resuing the mutex to protect the fault->deliver list, add a
> separate spinlock to do the job, so iommufd_fault_iopf_handler() would
> no longer be blocked by copy_from/to_user().
> 
> Provide two list manipulation helpers for fault->deliver:
>   - Extract the first iopf_group out of the fault->deliver list
>   - Restore an iopf_group back to the head of the fault->deliver list
> 
> Replace list_first_entry and list_for_each accordingly.
> 
> Fixes: 07838f7fd529 ("iommufd: Add iommufd fault object")
> Cc:stable@vger.kernel.org
> Suggested-by: Jason Gunthorpe<jgg@nvidia.com>
> Signed-off-by: Nicolin Chen<nicolinc@nvidia.com>
> ---
>   drivers/iommu/iommufd/iommufd_private.h | 26 +++++++++++++++
>   drivers/iommu/iommufd/fault.c           | 43 ++++++++++++++-----------
>   2 files changed, 50 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
> index b6d706cf2c66..d3097c857abf 100644
> --- a/drivers/iommu/iommufd/iommufd_private.h
> +++ b/drivers/iommu/iommufd/iommufd_private.h
> @@ -445,12 +445,38 @@ struct iommufd_fault {
>   
>   	/* The lists of outstanding faults protected by below mutex. */

It's better to update above comment as well.

>   	struct mutex mutex;
> +	spinlock_t lock; /* protects the deliver list */
>   	struct list_head deliver;
>   	struct xarray response;
>   
>   	struct wait_queue_head wait_queue;
>   };
>   
> +/* Extract the first node out of the fault->deliver list */
> +static inline struct iopf_group *
> +iommufd_fault_deliver_extract(struct iommufd_fault *fault)
> +{
> +	struct list_head *list = &fault->deliver;
> +	struct iopf_group *group = NULL;
> +
> +	spin_lock(&fault->lock);
> +	if (!list_empty(list)) {
> +		group = list_first_entry(list, struct iopf_group, node);
> +		list_del(&group->node);
> +	}
> +	spin_unlock(&fault->lock);
> +	return group;
> +}
> +
> +/* Restore a node back to the head in fault->deliver */
> +static inline void iommufd_fault_deliver_restore(struct iommufd_fault *fault,
> +						 struct iopf_group *group)
> +{
> +	spin_lock(&fault->lock);
> +	list_add(&fault->deliver, &group->node);

This is not right. It should be

	list_add(&group->node, &fault->deliver);

> +	spin_unlock(&fault->lock);
> +}

Others look good to me. With above addressed,

Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>

Re: [PATCH rc 2/2] iommufd/fault: Use a separate spinlock to protect fault->deliver list

Posted by Nicolin Chen 1 year ago

On Wed, Jan 15, 2025 at 01:24:53PM +0800, Baolu Lu wrote:
> On 1/15/25 07:28, Nicolin Chen wrote:
> > @@ -445,12 +445,38 @@ struct iommufd_fault {
> >   	/* The lists of outstanding faults protected by below mutex. */
> 
> It's better to update above comment as well.
> 
> >   	struct mutex mutex;
> > +	spinlock_t lock; /* protects the deliver list */
> >   	struct list_head deliver;
> >   	struct xarray response;

Ack. I'll do:
-	/* The lists of outstanding faults protected by below mutex. */
-	struct mutex mutex;
+	spinlock_t lock; /* protects the deliver list */
 	struct list_head deliver;
+	struct mutex mutex; /* serializes response flows */
 	struct xarray response;

> >   	struct wait_queue_head wait_queue;
> >   };
> > +/* Extract the first node out of the fault->deliver list */
> > +static inline struct iopf_group *
> > +iommufd_fault_deliver_extract(struct iommufd_fault *fault)
> > +{
> > +	struct list_head *list = &fault->deliver;
> > +	struct iopf_group *group = NULL;
> > +
> > +	spin_lock(&fault->lock);
> > +	if (!list_empty(list)) {
> > +		group = list_first_entry(list, struct iopf_group, node);
> > +		list_del(&group->node);
> > +	}
> > +	spin_unlock(&fault->lock);
> > +	return group;
> > +}
> > +
> > +/* Restore a node back to the head in fault->deliver */
> > +static inline void iommufd_fault_deliver_restore(struct iommufd_fault *fault,
> > +						 struct iopf_group *group)
> > +{
> > +	spin_lock(&fault->lock);
> > +	list_add(&fault->deliver, &group->node);
> 
> This is not right. It should be
> 
> 	list_add(&group->node, &fault->deliver);
> 
> > +	spin_unlock(&fault->lock);
> > +}

Oh, right!

> Others look good to me. With above addressed,
> 
> Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>

Thanks!
Nicolin

RE: [PATCH rc 2/2] iommufd/fault: Use a separate spinlock to protect fault->deliver list

Posted by Tian, Kevin 1 year ago

> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Wednesday, January 15, 2025 7:29 AM
> 
> @@ -445,12 +445,38 @@ struct iommufd_fault {
> 
>  	/* The lists of outstanding faults protected by below mutex. */
>  	struct mutex mutex;
> +	spinlock_t lock; /* protects the deliver list */
>  	struct list_head deliver;
>  	struct xarray response;

Move 'mutex' together with response then?

> 
> +/* Extract the first node out of the fault->deliver list */
> +static inline struct iopf_group *
> +iommufd_fault_deliver_extract(struct iommufd_fault *fault)

Probably simpler be iommufd_fault_fetch()

> @@ -102,17 +102,19 @@ static void iommufd_auto_response_faults(struct
> iommufd_hw_pagetable *hwpt,
>  					 struct iommufd_attach_handle
> *handle)
>  {
>  	struct iommufd_fault *fault = hwpt->fault;
> -	struct iopf_group *group, *next;
> +	struct iopf_group *group;
>  	unsigned long index;
> 
>  	if (!fault)
>  		return;
> 
>  	mutex_lock(&fault->mutex);
> -	list_for_each_entry_safe(group, next, &fault->deliver, node) {
> -		if (group->attach_handle != &handle->handle)
> +	for (group = iommufd_fault_deliver_extract(fault); group;
> +	     group = iommufd_fault_deliver_extract(fault)) {

	while (group = iommufd_fault_fetch(fault)) {
		...
	}

> @@ -266,17 +268,20 @@ static ssize_t iommufd_fault_fops_read(struct file
> *filep, char __user *buf,
>  		return -ESPIPE;
> 
>  	mutex_lock(&fault->mutex);
> -	while (!list_empty(&fault->deliver) && count > done) {
> -		group = list_first_entry(&fault->deliver,
> -					 struct iopf_group, node);
> -
> -		if (group->fault_count * fault_size > count - done)
> +	for (group = iommufd_fault_deliver_extract(fault); group;
> +	     group = iommufd_fault_deliver_extract(fault)) {
> +		if (done >= count ||
> +		    group->fault_count * fault_size > count - done) {
> +			iommufd_fault_deliver_restore(fault, group);
>  			break;
> +		}
> 
>  		rc = xa_alloc(&fault->response, &group->cookie, group,
>  			      xa_limit_32b, GFP_KERNEL);
> -		if (rc)
> +		if (rc) {
> +			iommufd_fault_deliver_restore(fault, group);
>  			break;
> +		}

The scope of mutex can be reduced to just protect the smaller trunk
touching fault->response.

Otherwise looks good:

Reviewed-by: Kevin Tian <kevin.tian@intel.com>

Re: [PATCH rc 2/2] iommufd/fault: Use a separate spinlock to protect fault->deliver list

Posted by Nicolin Chen 1 year ago

On Wed, Jan 15, 2025 at 05:24:44AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Wednesday, January 15, 2025 7:29 AM
> > 
> > @@ -445,12 +445,38 @@ struct iommufd_fault {
> > 
> >  	/* The lists of outstanding faults protected by below mutex. */
> >  	struct mutex mutex;
> > +	spinlock_t lock; /* protects the deliver list */
> >  	struct list_head deliver;
> >  	struct xarray response;
> 
> Move 'mutex' together with response then?

Ack.

> > 
> > +/* Extract the first node out of the fault->deliver list */
> > +static inline struct iopf_group *
> > +iommufd_fault_deliver_extract(struct iommufd_fault *fault)
> 
> Probably simpler be iommufd_fault_fetch()

We have deliver and response two lists. So I think that "deliver"
would be necessary. Yet, I can do "fetch" v.s. "extract".

> > @@ -102,17 +102,19 @@ static void iommufd_auto_response_faults(struct
> > iommufd_hw_pagetable *hwpt,
> >  					 struct iommufd_attach_handle
> > *handle)
> >  {
> >  	struct iommufd_fault *fault = hwpt->fault;
> > -	struct iopf_group *group, *next;
> > +	struct iopf_group *group;
> >  	unsigned long index;
> > 
> >  	if (!fault)
> >  		return;
> > 
> >  	mutex_lock(&fault->mutex);
> > -	list_for_each_entry_safe(group, next, &fault->deliver, node) {
> > -		if (group->attach_handle != &handle->handle)
> > +	for (group = iommufd_fault_deliver_extract(fault); group;
> > +	     group = iommufd_fault_deliver_extract(fault)) {
> 
> 	while (group = iommufd_fault_fetch(fault)) {
> 		...
> 	}

Ah, right...how didn't I see this lol.

> 
> > @@ -266,17 +268,20 @@ static ssize_t iommufd_fault_fops_read(struct file
> > *filep, char __user *buf,
> >  		return -ESPIPE;
> > 
> >  	mutex_lock(&fault->mutex);
> > -	while (!list_empty(&fault->deliver) && count > done) {
> > -		group = list_first_entry(&fault->deliver,
> > -					 struct iopf_group, node);
> > -
> > -		if (group->fault_count * fault_size > count - done)
> > +	for (group = iommufd_fault_deliver_extract(fault); group;
> > +	     group = iommufd_fault_deliver_extract(fault)) {
> > +		if (done >= count ||
> > +		    group->fault_count * fault_size > count - done) {
> > +			iommufd_fault_deliver_restore(fault, group);
> >  			break;
> > +		}
> > 
> >  		rc = xa_alloc(&fault->response, &group->cookie, group,
> >  			      xa_limit_32b, GFP_KERNEL);
> > -		if (rc)
> > +		if (rc) {
> > +			iommufd_fault_deliver_restore(fault, group);
> >  			break;
> > +		}
> 
> The scope of mutex can be reduced to just protect the smaller trunk
> touching fault->response.

Ack.

> Otherwise looks good:
> 
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>

Thanks!
Nicolin

[PATCH rc 1/2] iommufd/fault: Destroy response and mutex in iommufd_fault_destroy()
[PATCH rc 2/2] iommufd/fault: Use a separate spinlock to protect fault->deliver list