[PATCH v4 for-next 1/2] RDMA/core: Provide rdma_user_mmap_disassociate() to disassociate mmap pages

Junxian Huang posted 2 patches 1 week, 6 days ago
There is a newer version of this series
[PATCH v4 for-next 1/2] RDMA/core: Provide rdma_user_mmap_disassociate() to disassociate mmap pages
Posted by Junxian Huang 1 week, 6 days ago
From: Chengchang Tang <tangchengchang@huawei.com>

Provide a new api rdma_user_mmap_disassociate() for drivers to
disassociate mmap pages for a device.

Signed-off-by: Chengchang Tang <tangchengchang@huawei.com>
Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
---
 drivers/infiniband/core/uverbs.h      |  3 ++
 drivers/infiniband/core/uverbs_main.c | 55 +++++++++++++++++++++++++--
 include/rdma/ib_verbs.h               |  8 ++++
 3 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h
index 821d93c8f712..05b589aad5ef 100644
--- a/drivers/infiniband/core/uverbs.h
+++ b/drivers/infiniband/core/uverbs.h
@@ -160,6 +160,9 @@ struct ib_uverbs_file {
 	struct page *disassociate_page;
 
 	struct xarray		idr;
+
+	struct mutex disassociation_lock;
+	bool disassociated;
 };
 
 struct ib_uverbs_event {
diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c
index bc099287de9a..7c97df5d1a69 100644
--- a/drivers/infiniband/core/uverbs_main.c
+++ b/drivers/infiniband/core/uverbs_main.c
@@ -76,6 +76,7 @@ static dev_t dynamic_uverbs_dev;
 static DEFINE_IDA(uverbs_ida);
 static int ib_uverbs_add_one(struct ib_device *device);
 static void ib_uverbs_remove_one(struct ib_device *device, void *client_data);
+static struct ib_client uverbs_client;
 
 static char *uverbs_devnode(const struct device *dev, umode_t *mode)
 {
@@ -217,6 +218,7 @@ void ib_uverbs_release_file(struct kref *ref)
 
 	if (file->disassociate_page)
 		__free_pages(file->disassociate_page, 0);
+	mutex_destroy(&file->disassociation_lock);
 	mutex_destroy(&file->umap_lock);
 	mutex_destroy(&file->ucontext_lock);
 	kfree(file);
@@ -698,11 +700,20 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
 	ucontext = ib_uverbs_get_ucontext_file(file);
 	if (IS_ERR(ucontext)) {
 		ret = PTR_ERR(ucontext);
-		goto out;
+		goto out_srcu;
 	}
+
+	mutex_lock(&file->disassociation_lock);
+	if (file->disassociated) {
+		ret = -EPERM;
+		goto out_mutex;
+	}
+
 	vma->vm_ops = &rdma_umap_ops;
 	ret = ucontext->device->ops.mmap(ucontext, vma);
-out:
+out_mutex:
+	mutex_unlock(&file->disassociation_lock);
+out_srcu:
 	srcu_read_unlock(&file->device->disassociate_srcu, srcu_key);
 	return ret;
 }
@@ -723,10 +734,12 @@ static void rdma_umap_open(struct vm_area_struct *vma)
 	/* We are racing with disassociation */
 	if (!down_read_trylock(&ufile->hw_destroy_rwsem))
 		goto out_zap;
+	mutex_lock(&ufile->disassociation_lock);
+
 	/*
 	 * Disassociation already completed, the VMA should already be zapped.
 	 */
-	if (!ufile->ucontext)
+	if (!ufile->ucontext || ufile->disassociated)
 		goto out_unlock;
 
 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
@@ -734,10 +747,12 @@ static void rdma_umap_open(struct vm_area_struct *vma)
 		goto out_unlock;
 	rdma_umap_priv_init(priv, vma, opriv->entry);
 
+	mutex_unlock(&ufile->disassociation_lock);
 	up_read(&ufile->hw_destroy_rwsem);
 	return;
 
 out_unlock:
+	mutex_unlock(&ufile->disassociation_lock);
 	up_read(&ufile->hw_destroy_rwsem);
 out_zap:
 	/*
@@ -822,6 +837,8 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 	struct rdma_umap_priv *priv, *next_priv;
 
 	lockdep_assert_held(&ufile->hw_destroy_rwsem);
+	mutex_lock(&ufile->disassociation_lock);
+	ufile->disassociated = true;
 
 	while (1) {
 		struct mm_struct *mm = NULL;
@@ -847,8 +864,10 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 			break;
 		}
 		mutex_unlock(&ufile->umap_lock);
-		if (!mm)
+		if (!mm) {
+			mutex_unlock(&ufile->disassociation_lock);
 			return;
+		}
 
 		/*
 		 * The umap_lock is nested under mmap_lock since it used within
@@ -878,8 +897,34 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
 		mmap_read_unlock(mm);
 		mmput(mm);
 	}
+
+	mutex_unlock(&ufile->disassociation_lock);
 }
 
+/**
+ * rdma_user_mmap_disassociate() - Revoke mmaps for a device
+ * @device: device to revoke
+ *
+ * This function should be called by drivers that need to disable mmaps for the
+ * device, for instance because it is going to be reset.
+ */
+void rdma_user_mmap_disassociate(struct ib_device *device)
+{
+	struct ib_uverbs_device *uverbs_dev =
+		ib_get_client_data(device, &uverbs_client);
+	struct ib_uverbs_file *ufile;
+
+	mutex_lock(&uverbs_dev->lists_mutex);
+	list_for_each_entry(ufile, &uverbs_dev->uverbs_file_list, list) {
+		down_read(&ufile->hw_destroy_rwsem);
+		if (ufile->ucontext && !ufile->disassociated)
+			uverbs_user_mmap_disassociate(ufile);
+		up_read(&ufile->hw_destroy_rwsem);
+	}
+	mutex_unlock(&uverbs_dev->lists_mutex);
+}
+EXPORT_SYMBOL(rdma_user_mmap_disassociate);
+
 /*
  * ib_uverbs_open() does not need the BKL:
  *
@@ -949,6 +994,8 @@ static int ib_uverbs_open(struct inode *inode, struct file *filp)
 	mutex_init(&file->umap_lock);
 	INIT_LIST_HEAD(&file->umaps);
 
+	mutex_init(&file->disassociation_lock);
+
 	filp->private_data = file;
 	list_add_tail(&file->list, &dev->uverbs_file_list);
 	mutex_unlock(&dev->lists_mutex);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index a1dcf812d787..09b80c8253e2 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -2948,6 +2948,14 @@ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext,
 				      size_t length, u32 min_pgoff,
 				      u32 max_pgoff);
 
+#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)
+void rdma_user_mmap_disassociate(struct ib_device *device);
+#else
+static inline void rdma_user_mmap_disassociate(struct ib_device *device)
+{
+}
+#endif
+
 static inline int
 rdma_user_mmap_entry_insert_exact(struct ib_ucontext *ucontext,
 				  struct rdma_user_mmap_entry *entry,
-- 
2.33.0
Re: [PATCH v4 for-next 1/2] RDMA/core: Provide rdma_user_mmap_disassociate() to disassociate mmap pages
Posted by Jason Gunthorpe 1 week, 4 days ago
On Thu, Sep 05, 2024 at 09:11:54PM +0800, Junxian Huang wrote:

> @@ -698,11 +700,20 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
>  	ucontext = ib_uverbs_get_ucontext_file(file);
>  	if (IS_ERR(ucontext)) {
>  		ret = PTR_ERR(ucontext);
> -		goto out;
> +		goto out_srcu;
>  	}
> +
> +	mutex_lock(&file->disassociation_lock);
> +	if (file->disassociated) {
> +		ret = -EPERM;
> +		goto out_mutex;
> +	}

What sets disassociated back to false once the driver reset is
completed?

I think you should probably drop this and instead add a lock and test
inside the driver within its mmap op. While reset is ongoing fail all
new mmaps.

>  	/*
>  	 * Disassociation already completed, the VMA should already be zapped.
>  	 */
> -	if (!ufile->ucontext)
> +	if (!ufile->ucontext || ufile->disassociated)
>  		goto out_unlock;

Is this needed? It protects agains fork, but since the driver is still
present I wonder if it is OK

> @@ -822,6 +837,8 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
>  	struct rdma_umap_priv *priv, *next_priv;
>  
>  	lockdep_assert_held(&ufile->hw_destroy_rwsem);
> +	mutex_lock(&ufile->disassociation_lock);
> +	ufile->disassociated = true;

I think this doesn't need the hw_destroy_rwsem anymore since you are
using this new disassociation_lock instead. It doesn't make alot of
sense to hold the hw_destroy_rwsem for read here, it was ment to be
held for write.

Jason
Re: [PATCH v4 for-next 1/2] RDMA/core: Provide rdma_user_mmap_disassociate() to disassociate mmap pages
Posted by Junxian Huang 1 week, 2 days ago

On 2024/9/7 20:12, Jason Gunthorpe wrote:
> On Thu, Sep 05, 2024 at 09:11:54PM +0800, Junxian Huang wrote:
> 
>> @@ -698,11 +700,20 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
>>  	ucontext = ib_uverbs_get_ucontext_file(file);
>>  	if (IS_ERR(ucontext)) {
>>  		ret = PTR_ERR(ucontext);
>> -		goto out;
>> +		goto out_srcu;
>>  	}
>> +
>> +	mutex_lock(&file->disassociation_lock);
>> +	if (file->disassociated) {
>> +		ret = -EPERM;
>> +		goto out_mutex;
>> +	}
> 
> What sets disassociated back to false once the driver reset is
> completed?
> 
> I think you should probably drop this and instead add a lock and test
> inside the driver within its mmap op. While reset is ongoing fail all
> new mmaps.
> 

disassociated won't be set back to false. This is to stop new mmaps on
this ucontext even after reset is completed, because during hns reset,
all resources will be destroyed, and the ucontexts will become unavailable.

But of course, other drivers may handle this case differently from hns, so
I will remove disassociated here and put it in hns driver.

>>  	/*
>>  	 * Disassociation already completed, the VMA should already be zapped.
>>  	 */
>> -	if (!ufile->ucontext)
>> +	if (!ufile->ucontext || ufile->disassociated)
>>  		goto out_unlock;
> 
> Is this needed? It protects agains fork, but since the driver is still
> present I wonder if it is OK
> 

Will remove it too.

>> @@ -822,6 +837,8 @@ void uverbs_user_mmap_disassociate(struct ib_uverbs_file *ufile)
>>  	struct rdma_umap_priv *priv, *next_priv;
>>  
>>  	lockdep_assert_held(&ufile->hw_destroy_rwsem);
>> +	mutex_lock(&ufile->disassociation_lock);
>> +	ufile->disassociated = true;
> 
> I think this doesn't need the hw_destroy_rwsem anymore since you are
> using this new disassociation_lock instead. It doesn't make alot of
> sense to hold the hw_destroy_rwsem for read here, it was ment to be
> held for write.
> 

Then it seems we should remove the lockdep_assert_held() here?

Junxian

> Jason
Re: [PATCH v4 for-next 1/2] RDMA/core: Provide rdma_user_mmap_disassociate() to disassociate mmap pages
Posted by Jason Gunthorpe 3 days, 12 hours ago
On Mon, Sep 09, 2024 at 04:41:00PM +0800, Junxian Huang wrote:
> On 2024/9/7 20:12, Jason Gunthorpe wrote:
> > On Thu, Sep 05, 2024 at 09:11:54PM +0800, Junxian Huang wrote:
> > 
> >> @@ -698,11 +700,20 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
> >>  	ucontext = ib_uverbs_get_ucontext_file(file);
> >>  	if (IS_ERR(ucontext)) {
> >>  		ret = PTR_ERR(ucontext);
> >> -		goto out;
> >> +		goto out_srcu;
> >>  	}
> >> +
> >> +	mutex_lock(&file->disassociation_lock);
> >> +	if (file->disassociated) {
> >> +		ret = -EPERM;
> >> +		goto out_mutex;
> >> +	}
> > 
> > What sets disassociated back to false once the driver reset is
> > completed?
> > 
> > I think you should probably drop this and instead add a lock and test
> > inside the driver within its mmap op. While reset is ongoing fail all
> > new mmaps.
> > 
> 
> disassociated won't be set back to false. This is to stop new mmaps on
> this ucontext even after reset is completed, because during hns reset,
> all resources will be destroyed, and the ucontexts will become unavailable.

That isn't really the model we are going for, the ucontext should be
recoverable even if the objects are not.

If you want to really fully destroy and fence things then you have to
unregister the whole ib_device

> > I think this doesn't need the hw_destroy_rwsem anymore since you are
> > using this new disassociation_lock instead. It doesn't make alot of
> > sense to hold the hw_destroy_rwsem for read here, it was ment to be
> > held for write.
> 
> Then it seems we should remove the lockdep_assert_held() here?

Yes

Jason
Re: [PATCH v4 for-next 1/2] RDMA/core: Provide rdma_user_mmap_disassociate() to disassociate mmap pages
Posted by Leon Romanovsky 1 week ago
On Mon, Sep 09, 2024 at 04:41:00PM +0800, Junxian Huang wrote:
> 
> 
> On 2024/9/7 20:12, Jason Gunthorpe wrote:
> > On Thu, Sep 05, 2024 at 09:11:54PM +0800, Junxian Huang wrote:
> > 
> >> @@ -698,11 +700,20 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
> >>  	ucontext = ib_uverbs_get_ucontext_file(file);
> >>  	if (IS_ERR(ucontext)) {
> >>  		ret = PTR_ERR(ucontext);
> >> -		goto out;
> >> +		goto out_srcu;
> >>  	}
> >> +
> >> +	mutex_lock(&file->disassociation_lock);
> >> +	if (file->disassociated) {
> >> +		ret = -EPERM;
> >> +		goto out_mutex;
> >> +	}
> > 
> > What sets disassociated back to false once the driver reset is
> > completed?
> > 
> > I think you should probably drop this and instead add a lock and test
> > inside the driver within its mmap op. While reset is ongoing fail all
> > new mmaps.
> > 
> 
> disassociated won't be set back to false. This is to stop new mmaps on
> this ucontext even after reset is completed, because during hns reset,
> all resources will be destroyed, and the ucontexts will become unavailable.

ucontext is SW object and not HW object, why will it become unavailable?

Thanks
Re: [PATCH v4 for-next 1/2] RDMA/core: Provide rdma_user_mmap_disassociate() to disassociate mmap pages
Posted by Junxian Huang 1 week ago

On 2024/9/11 18:20, Leon Romanovsky wrote:
> On Mon, Sep 09, 2024 at 04:41:00PM +0800, Junxian Huang wrote:
>>
>>
>> On 2024/9/7 20:12, Jason Gunthorpe wrote:
>>> On Thu, Sep 05, 2024 at 09:11:54PM +0800, Junxian Huang wrote:
>>>
>>>> @@ -698,11 +700,20 @@ static int ib_uverbs_mmap(struct file *filp, struct vm_area_struct *vma)
>>>>  	ucontext = ib_uverbs_get_ucontext_file(file);
>>>>  	if (IS_ERR(ucontext)) {
>>>>  		ret = PTR_ERR(ucontext);
>>>> -		goto out;
>>>> +		goto out_srcu;
>>>>  	}
>>>> +
>>>> +	mutex_lock(&file->disassociation_lock);
>>>> +	if (file->disassociated) {
>>>> +		ret = -EPERM;
>>>> +		goto out_mutex;
>>>> +	}
>>>
>>> What sets disassociated back to false once the driver reset is
>>> completed?
>>>
>>> I think you should probably drop this and instead add a lock and test
>>> inside the driver within its mmap op. While reset is ongoing fail all
>>> new mmaps.
>>>
>>
>> disassociated won't be set back to false. This is to stop new mmaps on
>> this ucontext even after reset is completed, because during hns reset,
>> all resources will be destroyed, and the ucontexts will become unavailable.
> 
> ucontext is SW object and not HW object, why will it become unavailable?
> 

Once hns device is reset, we don't allow any doorbell until driver's
re-initialization is completed. Not only all existing mmaps on ucontexts
will be zapped, no more new mmaps are allowed either.

This actually makes ucontexts unavailable since userspace cannot access
HW with them any more. Users will have to destroy the old ucontext and
allocate a new one after driver's re-initialization is completed.

Junxian

> Thanks
Re: [PATCH v4 for-next 1/2] RDMA/core: Provide rdma_user_mmap_disassociate() to disassociate mmap pages
Posted by Jason Gunthorpe 3 days, 12 hours ago
On Wed, Sep 11, 2024 at 08:38:35PM +0800, Junxian Huang wrote:

> Once hns device is reset, we don't allow any doorbell until driver's
> re-initialization is completed. Not only all existing mmaps on ucontexts
> will be zapped, no more new mmaps are allowed either.
> 
> This actually makes ucontexts unavailable since userspace cannot access
> HW with them any more. Users will have to destroy the old ucontext and
> allocate a new one after driver's re-initialization is completed.

It is supposed to come back once the reset is completed, yes userspace
may need to restart some objects but the ucontext shouldn't be left
crippled.

Jason