[PATCH v7 1/7] vduse: make domain_lock an rwlock

Eugenio Pérez posted 7 patches 2 months, 1 week ago
There is a newer version of this series
[PATCH v7 1/7] vduse: make domain_lock an rwlock
Posted by Eugenio Pérez 2 months, 1 week ago
It will be used in a few more scenarios read-only so make it more
scalable.

Suggested-by: Xie Yongji <xieyongji@bytedance.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Reviewed-by: Xie Yongji <xieyongji@bytedance.com>
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
---
v6: Not including linux/rwlock.h directly.

v2: New in v2.
---
 drivers/vdpa/vdpa_user/vduse_dev.c | 41 +++++++++++++++---------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index e7bced0b5542..321092bfb2a6 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -14,6 +14,7 @@
 #include <linux/cdev.h>
 #include <linux/device.h>
 #include <linux/eventfd.h>
+#include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
 #include <linux/dma-map-ops.h>
@@ -117,7 +118,7 @@ struct vduse_dev {
 	struct vduse_umem *umem;
 	struct mutex mem_lock;
 	unsigned int bounce_size;
-	struct mutex domain_lock;
+	rwlock_t domain_lock;
 };
 
 struct vduse_dev_msg {
@@ -1176,9 +1177,9 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 		if (entry.start > entry.last)
 			break;
 
-		mutex_lock(&dev->domain_lock);
+		read_lock(&dev->domain_lock);
 		if (!dev->domain) {
-			mutex_unlock(&dev->domain_lock);
+			read_unlock(&dev->domain_lock);
 			break;
 		}
 		spin_lock(&dev->domain->iotlb_lock);
@@ -1193,7 +1194,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 			entry.perm = map->perm;
 		}
 		spin_unlock(&dev->domain->iotlb_lock);
-		mutex_unlock(&dev->domain_lock);
+		read_unlock(&dev->domain_lock);
 		ret = -EINVAL;
 		if (!f)
 			break;
@@ -1346,10 +1347,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 				 sizeof(umem.reserved)))
 			break;
 
-		mutex_lock(&dev->domain_lock);
+		write_lock(&dev->domain_lock);
 		ret = vduse_dev_reg_umem(dev, umem.iova,
 					 umem.uaddr, umem.size);
-		mutex_unlock(&dev->domain_lock);
+		write_unlock(&dev->domain_lock);
 		break;
 	}
 	case VDUSE_IOTLB_DEREG_UMEM: {
@@ -1363,10 +1364,10 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 		if (!is_mem_zero((const char *)umem.reserved,
 				 sizeof(umem.reserved)))
 			break;
-		mutex_lock(&dev->domain_lock);
+		write_lock(&dev->domain_lock);
 		ret = vduse_dev_dereg_umem(dev, umem.iova,
 					   umem.size);
-		mutex_unlock(&dev->domain_lock);
+		write_unlock(&dev->domain_lock);
 		break;
 	}
 	case VDUSE_IOTLB_GET_INFO: {
@@ -1385,9 +1386,9 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 				 sizeof(info.reserved)))
 			break;
 
-		mutex_lock(&dev->domain_lock);
+		read_lock(&dev->domain_lock);
 		if (!dev->domain) {
-			mutex_unlock(&dev->domain_lock);
+			read_unlock(&dev->domain_lock);
 			break;
 		}
 		spin_lock(&dev->domain->iotlb_lock);
@@ -1402,7 +1403,7 @@ static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
 				info.capability |= VDUSE_IOVA_CAP_UMEM;
 		}
 		spin_unlock(&dev->domain->iotlb_lock);
-		mutex_unlock(&dev->domain_lock);
+		read_unlock(&dev->domain_lock);
 		if (!map)
 			break;
 
@@ -1425,10 +1426,10 @@ static int vduse_dev_release(struct inode *inode, struct file *file)
 {
 	struct vduse_dev *dev = file->private_data;
 
-	mutex_lock(&dev->domain_lock);
+	write_lock(&dev->domain_lock);
 	if (dev->domain)
 		vduse_dev_dereg_umem(dev, 0, dev->domain->bounce_size);
-	mutex_unlock(&dev->domain_lock);
+	write_unlock(&dev->domain_lock);
 	spin_lock(&dev->msg_lock);
 	/* Make sure the inflight messages can processed after reconncection */
 	list_splice_init(&dev->recv_list, &dev->send_list);
@@ -1647,7 +1648,7 @@ static struct vduse_dev *vduse_dev_create(void)
 
 	mutex_init(&dev->lock);
 	mutex_init(&dev->mem_lock);
-	mutex_init(&dev->domain_lock);
+	rwlock_init(&dev->domain_lock);
 	spin_lock_init(&dev->msg_lock);
 	INIT_LIST_HEAD(&dev->send_list);
 	INIT_LIST_HEAD(&dev->recv_list);
@@ -1805,7 +1806,7 @@ static ssize_t bounce_size_store(struct device *device,
 	int ret;
 
 	ret = -EPERM;
-	mutex_lock(&dev->domain_lock);
+	write_lock(&dev->domain_lock);
 	if (dev->domain)
 		goto unlock;
 
@@ -1821,7 +1822,7 @@ static ssize_t bounce_size_store(struct device *device,
 	dev->bounce_size = bounce_size & PAGE_MASK;
 	ret = count;
 unlock:
-	mutex_unlock(&dev->domain_lock);
+	write_unlock(&dev->domain_lock);
 	return ret;
 }
 
@@ -2045,11 +2046,11 @@ static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
 	if (ret)
 		return ret;
 
-	mutex_lock(&dev->domain_lock);
+	write_lock(&dev->domain_lock);
 	if (!dev->domain)
 		dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
 						  dev->bounce_size);
-	mutex_unlock(&dev->domain_lock);
+	write_unlock(&dev->domain_lock);
 	if (!dev->domain) {
 		put_device(&dev->vdev->vdpa.dev);
 		return -ENOMEM;
@@ -2059,10 +2060,10 @@ static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
 	ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
 	if (ret) {
 		put_device(&dev->vdev->vdpa.dev);
-		mutex_lock(&dev->domain_lock);
+		write_lock(&dev->domain_lock);
 		vduse_domain_destroy(dev->domain);
 		dev->domain = NULL;
-		mutex_unlock(&dev->domain_lock);
+		write_unlock(&dev->domain_lock);
 		return ret;
 	}
 
-- 
2.51.0

Re: [PATCH v7 1/7] vduse: make domain_lock an rwlock
Posted by Michael S. Tsirkin 2 months, 1 week ago
On Fri, Oct 10, 2025 at 10:58:21AM +0200, Eugenio Pérez wrote:
> It will be used in a few more scenarios read-only so make it more
> scalable.

Well a mutex is sleepable and rwlock is a spinlock.

So this does much more than "make it more scalable".

"A few more scenarios" and "scalable" is also vague.
RW has its own issues such as fairness.

So tell us please,  which operations do you want to speed up and why?
What kind of speedup was observed?

All this belongs in the commit log.

> Suggested-by: Xie Yongji <xieyongji@bytedance.com>
> Acked-by: Jason Wang <jasowang@redhat.com>
> Reviewed-by: Xie Yongji <xieyongji@bytedance.com>
> Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
> ---
> v6: Not including linux/rwlock.h directly.
> 
> v2: New in v2.
> ---


...


> @@ -2045,11 +2046,11 @@ static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
>  	if (ret)
>  		return ret;
>  
> -	mutex_lock(&dev->domain_lock);
> +	write_lock(&dev->domain_lock);
>  	if (!dev->domain)
>  		dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
>  						  dev->bounce_size);
> -	mutex_unlock(&dev->domain_lock);
> +	write_unlock(&dev->domain_lock);
>  	if (!dev->domain) {
>  		put_device(&dev->vdev->vdpa.dev);
>  		return -ENOMEM;


Let's look at this example:

So now you are invoking this under an rw lock:




struct vduse_iova_domain *
vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
{                                
        struct vduse_iova_domain *domain;
        struct file *file;
        struct vduse_bounce_map *map;
        unsigned long pfn, bounce_pfns;
        int ret;
        
        bounce_pfns = PAGE_ALIGN(bounce_size) >> BOUNCE_MAP_SHIFT;
        if (iova_limit <= bounce_size)
                return NULL;
        
        domain = kzalloc(sizeof(*domain), GFP_KERNEL);
        if (!domain)
                return NULL;


...



Which unless I am mistaken will produce a lockdep splat and deadlock.


So it looks like the previous version did not compile
and this one looks DOA.  What's up?

At this stage please include information about configs you
tested, and how.

And any locking changes should also be tested with lockdep enabled
please.


-- 
MST
Re: [PATCH v7 1/7] vduse: make domain_lock an rwlock
Posted by Eugenio Perez Martin 1 month, 3 weeks ago
On Fri, Oct 10, 2025 at 2:34 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>

...

>
>
> > @@ -2045,11 +2046,11 @@ static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
> >       if (ret)
> >               return ret;
> >
> > -     mutex_lock(&dev->domain_lock);
> > +     write_lock(&dev->domain_lock);
> >       if (!dev->domain)
> >               dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
> >                                                 dev->bounce_size);
> > -     mutex_unlock(&dev->domain_lock);
> > +     write_unlock(&dev->domain_lock);
> >       if (!dev->domain) {
> >               put_device(&dev->vdev->vdpa.dev);
> >               return -ENOMEM;
>
>
> Let's look at this example:
>
> So now you are invoking this under an rw lock:
>
>
>
>
> struct vduse_iova_domain *
> vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
> {
>         struct vduse_iova_domain *domain;
>         struct file *file;
>         struct vduse_bounce_map *map;
>         unsigned long pfn, bounce_pfns;
>         int ret;
>
>         bounce_pfns = PAGE_ALIGN(bounce_size) >> BOUNCE_MAP_SHIFT;
>         if (iova_limit <= bounce_size)
>                 return NULL;
>
>         domain = kzalloc(sizeof(*domain), GFP_KERNEL);
>         if (!domain)
>                 return NULL;
>
>
> ...
>
>
>
> Which unless I am mistaken will produce a lockdep splat and deadlock.
>

Can you expand on this? All that code works with and without lockdep.

>
> So it looks like the previous version did not compile
> and this one looks DOA.  What's up?
>

In case it is useful, I forgot to enable some vendor vdpa drivers of
HW I don't have, and that was the reason why it didn't compile. But I
didn't send it without testing it of course.

> At this stage please include information about configs you
> tested, and how.
>
> And any locking changes should also be tested with lockdep enabled
> please.
>

This is the text I'm planning to include in the next series, please
let me know if you want me to expand:

Tested by creating a VDUSE device OVS with and without MQ, and live
migrating between two hosts back and forth while maintaining ping
alive in all the stages. All tested with and without lockdep.
Re: [PATCH v7 1/7] vduse: make domain_lock an rwlock
Posted by Michael S. Tsirkin 1 month, 3 weeks ago
On Thu, Oct 23, 2025 at 10:59:01AM +0200, Eugenio Perez Martin wrote:
> On Fri, Oct 10, 2025 at 2:34 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> 
> ...
> 
> >
> >
> > > @@ -2045,11 +2046,11 @@ static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name,
> > >       if (ret)
> > >               return ret;
> > >
> > > -     mutex_lock(&dev->domain_lock);
> > > +     write_lock(&dev->domain_lock);
> > >       if (!dev->domain)
> > >               dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
> > >                                                 dev->bounce_size);
> > > -     mutex_unlock(&dev->domain_lock);
> > > +     write_unlock(&dev->domain_lock);
> > >       if (!dev->domain) {
> > >               put_device(&dev->vdev->vdpa.dev);
> > >               return -ENOMEM;
> >
> >
> > Let's look at this example:
> >
> > So now you are invoking this under an rw lock:
> >
> >
> >
> >
> > struct vduse_iova_domain *
> > vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
> > {
> >         struct vduse_iova_domain *domain;
> >         struct file *file;
> >         struct vduse_bounce_map *map;
> >         unsigned long pfn, bounce_pfns;
> >         int ret;
> >
> >         bounce_pfns = PAGE_ALIGN(bounce_size) >> BOUNCE_MAP_SHIFT;
> >         if (iova_limit <= bounce_size)
> >                 return NULL;
> >
> >         domain = kzalloc(sizeof(*domain), GFP_KERNEL);
> >         if (!domain)
> >                 return NULL;
> >
> >
> > ...
> >
> >
> >
> > Which unless I am mistaken will produce a lockdep splat and deadlock.
> >
> 
> Can you expand on this? All that code works with and without lockdep.


GFP_KERNEL can sleep and if that happens and another thread runs and
will try to take the lock, it will spin forever.


> >
> > So it looks like the previous version did not compile
> > and this one looks DOA.  What's up?
> >
> 
> In case it is useful, I forgot to enable some vendor vdpa drivers of
> HW I don't have, and that was the reason why it didn't compile. But I
> didn't send it without testing it of course.
> 
> > At this stage please include information about configs you
> > tested, and how.
> >
> > And any locking changes should also be tested with lockdep enabled
> > please.
> >
> 
> This is the text I'm planning to include in the next series, please
> let me know if you want me to expand:
> 
> Tested by creating a VDUSE device OVS with and without MQ, and live
> migrating between two hosts back and forth while maintaining ping
> alive in all the stages. All tested with and without lockdep.

Re: [PATCH v7 1/7] vduse: make domain_lock an rwlock
Posted by Eugenio Perez Martin 2 months ago
On Fri, Oct 10, 2025 at 2:34 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>
> On Fri, Oct 10, 2025 at 10:58:21AM +0200, Eugenio Pérez wrote:
> > It will be used in a few more scenarios read-only so make it more
> > scalable.
>
> Well a mutex is sleepable and rwlock is a spinlock.
>
> So this does much more than "make it more scalable".
>
> "A few more scenarios" and "scalable" is also vague.
> RW has its own issues such as fairness.
>
> So tell us please,  which operations do you want to speed up and why?
> What kind of speedup was observed?
>

I think you're totally right here.

Yongji, would it work for you if we leave this conversion on top of
the series with a simple mutex, and we have bandwith to explore the
benefits of this? You were the one proposing it [1].

Thanks!

[1] https://lore.kernel.org/all/CACycT3v=_Nm6fefJGFEyoU+Xf5G=Kzi0sXhhaBHnJQZcG-4EqA@mail.gmail.com/