[PATCH V2 3/7] vhost-vdpa: VHOST_NEW_OWNER

Steve Sistare posted 7 patches 1 year, 5 months ago
[PATCH V2 3/7] vhost-vdpa: VHOST_NEW_OWNER
Posted by Steve Sistare 1 year, 5 months ago
Add an ioctl to transfer file descriptor ownership and pinned memory
accounting from one process to another.

This is more efficient than VHOST_RESET_OWNER followed by VHOST_SET_OWNER,
as that would unpin all physical pages, requiring them to be repinned in
the new process.  That would cost multiple seconds for large memories, and
be incurred during a virtual machine's pause time during live update.

Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
---
 drivers/vhost/vdpa.c       | 41 ++++++++++++++++++++++++++++++++++++++
 drivers/vhost/vhost.c      | 15 ++++++++++++++
 drivers/vhost/vhost.h      |  1 +
 include/uapi/linux/vhost.h | 10 ++++++++++
 4 files changed, 67 insertions(+)

diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
index b49e5831b3f0..5cf55ca4ec02 100644
--- a/drivers/vhost/vdpa.c
+++ b/drivers/vhost/vdpa.c
@@ -632,6 +632,44 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v)
 	return ret;
 }
 
+static long vhost_vdpa_new_owner(struct vhost_vdpa *v)
+{
+	int r;
+	struct vhost_dev *vdev = &v->vdev;
+	struct mm_struct *mm_old = vdev->mm;
+	struct mm_struct *mm_new = current->mm;
+	long pinned_vm = v->pinned_vm;
+	unsigned long lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
+
+	if (!mm_old)
+		return -EINVAL;
+	mmgrab(mm_old);
+
+	if (!v->vdpa->use_va &&
+	    pinned_vm + atomic64_read(&mm_new->pinned_vm) > lock_limit) {
+		r = -ENOMEM;
+		goto out;
+	}
+	r = vhost_vdpa_bind_mm(v, mm_new);
+	if (r)
+		goto out;
+
+	r = vhost_dev_new_owner(vdev);
+	if (r) {
+		vhost_vdpa_bind_mm(v, mm_old);
+		goto out;
+	}
+
+	if (!v->vdpa->use_va) {
+		atomic64_sub(pinned_vm, &mm_old->pinned_vm);
+		atomic64_add(pinned_vm, &mm_new->pinned_vm);
+	}
+
+out:
+	mmdrop(mm_old);
+	return r;
+}
+
 static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
 				   void __user *argp)
 {
@@ -876,6 +914,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
 	case VHOST_VDPA_RESUME:
 		r = vhost_vdpa_resume(v);
 		break;
+	case VHOST_NEW_OWNER:
+		r = vhost_vdpa_new_owner(v);
+		break;
 	default:
 		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
 		if (r == -ENOIOCTLCMD)
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index b60955682474..ab40ae50552f 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -963,6 +963,21 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
 }
 EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
 
+/* Caller should have device mutex */
+long vhost_dev_new_owner(struct vhost_dev *dev)
+{
+	if (dev->mm == current->mm)
+		return -EBUSY;
+
+	if (!vhost_dev_has_owner(dev))
+		return -EINVAL;
+
+	vhost_detach_mm(dev);
+	vhost_attach_mm(dev);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(vhost_dev_new_owner);
+
 static struct vhost_iotlb *iotlb_alloc(void)
 {
 	return vhost_iotlb_alloc(max_iotlb_entries,
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index bb75a292d50c..8b2018bb02b1 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -187,6 +187,7 @@ void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
 		    int (*msg_handler)(struct vhost_dev *dev, u32 asid,
 				       struct vhost_iotlb_msg *msg));
 long vhost_dev_set_owner(struct vhost_dev *dev);
+long vhost_dev_new_owner(struct vhost_dev *dev);
 bool vhost_dev_has_owner(struct vhost_dev *dev);
 long vhost_dev_check_owner(struct vhost_dev *);
 struct vhost_iotlb *vhost_dev_reset_owner_prepare(void);
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index b95dd84eef2d..543d0e3434c3 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -123,6 +123,16 @@
 #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
 #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
 
+/* Set current process as the new owner of this file descriptor.  The fd must
+ * already be owned, via a prior call to VHOST_SET_OWNER.  The pinned memory
+ * count is transferred from the previous to the new owner.
+ * Errors:
+ *   EINVAL: not owned
+ *   EBUSY:  caller is already the owner
+ *   ENOMEM: RLIMIT_MEMLOCK exceeded
+ */
+#define VHOST_NEW_OWNER _IO(VHOST_VIRTIO, 0x27)
+
 /* VHOST_NET specific defines */
 
 /* Attach virtio net ring to a raw socket, or tap device.
-- 
2.39.3
Re: [PATCH V2 3/7] vhost-vdpa: VHOST_NEW_OWNER
Posted by Michael S. Tsirkin 1 year, 5 months ago
On Fri, Jul 12, 2024 at 06:18:49AM -0700, Steve Sistare wrote:
> Add an ioctl to transfer file descriptor ownership and pinned memory
> accounting from one process to another.
> 
> This is more efficient than VHOST_RESET_OWNER followed by VHOST_SET_OWNER,
> as that would unpin all physical pages, requiring them to be repinned in
> the new process.  That would cost multiple seconds for large memories, and
> be incurred during a virtual machine's pause time during live update.
> 
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>

Please, we just need to switch to use iommufd for pinning.
Piling up all these hacks gets us nowhere.


> ---
>  drivers/vhost/vdpa.c       | 41 ++++++++++++++++++++++++++++++++++++++
>  drivers/vhost/vhost.c      | 15 ++++++++++++++
>  drivers/vhost/vhost.h      |  1 +
>  include/uapi/linux/vhost.h | 10 ++++++++++
>  4 files changed, 67 insertions(+)
> 
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index b49e5831b3f0..5cf55ca4ec02 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -632,6 +632,44 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v)
>  	return ret;
>  }
>  
> +static long vhost_vdpa_new_owner(struct vhost_vdpa *v)
> +{
> +	int r;
> +	struct vhost_dev *vdev = &v->vdev;
> +	struct mm_struct *mm_old = vdev->mm;
> +	struct mm_struct *mm_new = current->mm;
> +	long pinned_vm = v->pinned_vm;
> +	unsigned long lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
> +
> +	if (!mm_old)
> +		return -EINVAL;
> +	mmgrab(mm_old);
> +
> +	if (!v->vdpa->use_va &&
> +	    pinned_vm + atomic64_read(&mm_new->pinned_vm) > lock_limit) {
> +		r = -ENOMEM;
> +		goto out;
> +	}
> +	r = vhost_vdpa_bind_mm(v, mm_new);
> +	if (r)
> +		goto out;
> +
> +	r = vhost_dev_new_owner(vdev);
> +	if (r) {
> +		vhost_vdpa_bind_mm(v, mm_old);
> +		goto out;
> +	}
> +
> +	if (!v->vdpa->use_va) {
> +		atomic64_sub(pinned_vm, &mm_old->pinned_vm);
> +		atomic64_add(pinned_vm, &mm_new->pinned_vm);
> +	}
> +
> +out:
> +	mmdrop(mm_old);
> +	return r;
> +}
> +
>  static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>  				   void __user *argp)
>  {
> @@ -876,6 +914,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
>  	case VHOST_VDPA_RESUME:
>  		r = vhost_vdpa_resume(v);
>  		break;
> +	case VHOST_NEW_OWNER:
> +		r = vhost_vdpa_new_owner(v);
> +		break;
>  	default:
>  		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
>  		if (r == -ENOIOCTLCMD)
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index b60955682474..ab40ae50552f 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -963,6 +963,21 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
>  
> +/* Caller should have device mutex */
> +long vhost_dev_new_owner(struct vhost_dev *dev)
> +{
> +	if (dev->mm == current->mm)
> +		return -EBUSY;
> +
> +	if (!vhost_dev_has_owner(dev))
> +		return -EINVAL;
> +
> +	vhost_detach_mm(dev);
> +	vhost_attach_mm(dev);
> +	return 0;
> +}
> +EXPORT_SYMBOL_GPL(vhost_dev_new_owner);
> +
>  static struct vhost_iotlb *iotlb_alloc(void)
>  {
>  	return vhost_iotlb_alloc(max_iotlb_entries,
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index bb75a292d50c..8b2018bb02b1 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -187,6 +187,7 @@ void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
>  		    int (*msg_handler)(struct vhost_dev *dev, u32 asid,
>  				       struct vhost_iotlb_msg *msg));
>  long vhost_dev_set_owner(struct vhost_dev *dev);
> +long vhost_dev_new_owner(struct vhost_dev *dev);
>  bool vhost_dev_has_owner(struct vhost_dev *dev);
>  long vhost_dev_check_owner(struct vhost_dev *);
>  struct vhost_iotlb *vhost_dev_reset_owner_prepare(void);
> diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
> index b95dd84eef2d..543d0e3434c3 100644
> --- a/include/uapi/linux/vhost.h
> +++ b/include/uapi/linux/vhost.h
> @@ -123,6 +123,16 @@
>  #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
>  #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
>  
> +/* Set current process as the new owner of this file descriptor.  The fd must
> + * already be owned, via a prior call to VHOST_SET_OWNER.  The pinned memory
> + * count is transferred from the previous to the new owner.
> + * Errors:
> + *   EINVAL: not owned
> + *   EBUSY:  caller is already the owner
> + *   ENOMEM: RLIMIT_MEMLOCK exceeded
> + */
> +#define VHOST_NEW_OWNER _IO(VHOST_VIRTIO, 0x27)
> +
>  /* VHOST_NET specific defines */
>  
>  /* Attach virtio net ring to a raw socket, or tap device.
> -- 
> 2.39.3
Re: [PATCH V2 3/7] vhost-vdpa: VHOST_NEW_OWNER
Posted by Steven Sistare 1 year, 5 months ago
On 7/15/2024 5:07 AM, Michael S. Tsirkin wrote:
> On Fri, Jul 12, 2024 at 06:18:49AM -0700, Steve Sistare wrote:
>> Add an ioctl to transfer file descriptor ownership and pinned memory
>> accounting from one process to another.
>>
>> This is more efficient than VHOST_RESET_OWNER followed by VHOST_SET_OWNER,
>> as that would unpin all physical pages, requiring them to be repinned in
>> the new process.  That would cost multiple seconds for large memories, and
>> be incurred during a virtual machine's pause time during live update.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> 
> Please, we just need to switch to use iommufd for pinning.
> Piling up all these hacks gets us nowhere.

I am working on iommufd kernel interfaces and QEMU changes.  But who is working
on iommufd support for vdpa? If no one, or not for years, then adding these
small interfaces to vdpa plugs a signficant gap in live update coverage.

FWIW, the iommufd interfaces for live update will look much the same: change owner
and pinned memory accounting, and update virtual addresses.  So adding that to vdpa
will not make it look like an odd duck.

- Steve

>> ---
>>   drivers/vhost/vdpa.c       | 41 ++++++++++++++++++++++++++++++++++++++
>>   drivers/vhost/vhost.c      | 15 ++++++++++++++
>>   drivers/vhost/vhost.h      |  1 +
>>   include/uapi/linux/vhost.h | 10 ++++++++++
>>   4 files changed, 67 insertions(+)
>>
>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>> index b49e5831b3f0..5cf55ca4ec02 100644
>> --- a/drivers/vhost/vdpa.c
>> +++ b/drivers/vhost/vdpa.c
>> @@ -632,6 +632,44 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v)
>>   	return ret;
>>   }
>>   
>> +static long vhost_vdpa_new_owner(struct vhost_vdpa *v)
>> +{
>> +	int r;
>> +	struct vhost_dev *vdev = &v->vdev;
>> +	struct mm_struct *mm_old = vdev->mm;
>> +	struct mm_struct *mm_new = current->mm;
>> +	long pinned_vm = v->pinned_vm;
>> +	unsigned long lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
>> +
>> +	if (!mm_old)
>> +		return -EINVAL;
>> +	mmgrab(mm_old);
>> +
>> +	if (!v->vdpa->use_va &&
>> +	    pinned_vm + atomic64_read(&mm_new->pinned_vm) > lock_limit) {
>> +		r = -ENOMEM;
>> +		goto out;
>> +	}
>> +	r = vhost_vdpa_bind_mm(v, mm_new);
>> +	if (r)
>> +		goto out;
>> +
>> +	r = vhost_dev_new_owner(vdev);
>> +	if (r) {
>> +		vhost_vdpa_bind_mm(v, mm_old);
>> +		goto out;
>> +	}
>> +
>> +	if (!v->vdpa->use_va) {
>> +		atomic64_sub(pinned_vm, &mm_old->pinned_vm);
>> +		atomic64_add(pinned_vm, &mm_new->pinned_vm);
>> +	}
>> +
>> +out:
>> +	mmdrop(mm_old);
>> +	return r;
>> +}
>> +
>>   static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>>   				   void __user *argp)
>>   {
>> @@ -876,6 +914,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
>>   	case VHOST_VDPA_RESUME:
>>   		r = vhost_vdpa_resume(v);
>>   		break;
>> +	case VHOST_NEW_OWNER:
>> +		r = vhost_vdpa_new_owner(v);
>> +		break;
>>   	default:
>>   		r = vhost_dev_ioctl(&v->vdev, cmd, argp);
>>   		if (r == -ENOIOCTLCMD)
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index b60955682474..ab40ae50552f 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -963,6 +963,21 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
>>   }
>>   EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
>>   
>> +/* Caller should have device mutex */
>> +long vhost_dev_new_owner(struct vhost_dev *dev)
>> +{
>> +	if (dev->mm == current->mm)
>> +		return -EBUSY;
>> +
>> +	if (!vhost_dev_has_owner(dev))
>> +		return -EINVAL;
>> +
>> +	vhost_detach_mm(dev);
>> +	vhost_attach_mm(dev);
>> +	return 0;
>> +}
>> +EXPORT_SYMBOL_GPL(vhost_dev_new_owner);
>> +
>>   static struct vhost_iotlb *iotlb_alloc(void)
>>   {
>>   	return vhost_iotlb_alloc(max_iotlb_entries,
>> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
>> index bb75a292d50c..8b2018bb02b1 100644
>> --- a/drivers/vhost/vhost.h
>> +++ b/drivers/vhost/vhost.h
>> @@ -187,6 +187,7 @@ void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs,
>>   		    int (*msg_handler)(struct vhost_dev *dev, u32 asid,
>>   				       struct vhost_iotlb_msg *msg));
>>   long vhost_dev_set_owner(struct vhost_dev *dev);
>> +long vhost_dev_new_owner(struct vhost_dev *dev);
>>   bool vhost_dev_has_owner(struct vhost_dev *dev);
>>   long vhost_dev_check_owner(struct vhost_dev *);
>>   struct vhost_iotlb *vhost_dev_reset_owner_prepare(void);
>> diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
>> index b95dd84eef2d..543d0e3434c3 100644
>> --- a/include/uapi/linux/vhost.h
>> +++ b/include/uapi/linux/vhost.h
>> @@ -123,6 +123,16 @@
>>   #define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
>>   #define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
>>   
>> +/* Set current process as the new owner of this file descriptor.  The fd must
>> + * already be owned, via a prior call to VHOST_SET_OWNER.  The pinned memory
>> + * count is transferred from the previous to the new owner.
>> + * Errors:
>> + *   EINVAL: not owned
>> + *   EBUSY:  caller is already the owner
>> + *   ENOMEM: RLIMIT_MEMLOCK exceeded
>> + */
>> +#define VHOST_NEW_OWNER _IO(VHOST_VIRTIO, 0x27)
>> +
>>   /* VHOST_NET specific defines */
>>   
>>   /* Attach virtio net ring to a raw socket, or tap device.
>> -- 
>> 2.39.3
>
Re: [PATCH V2 3/7] vhost-vdpa: VHOST_NEW_OWNER
Posted by Michael S. Tsirkin 1 year, 5 months ago
On Mon, Jul 15, 2024 at 10:29:26AM -0400, Steven Sistare wrote:
> On 7/15/2024 5:07 AM, Michael S. Tsirkin wrote:
> > On Fri, Jul 12, 2024 at 06:18:49AM -0700, Steve Sistare wrote:
> > > Add an ioctl to transfer file descriptor ownership and pinned memory
> > > accounting from one process to another.
> > > 
> > > This is more efficient than VHOST_RESET_OWNER followed by VHOST_SET_OWNER,
> > > as that would unpin all physical pages, requiring them to be repinned in
> > > the new process.  That would cost multiple seconds for large memories, and
> > > be incurred during a virtual machine's pause time during live update.
> > > 
> > > Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> > 
> > Please, we just need to switch to use iommufd for pinning.
> > Piling up all these hacks gets us nowhere.
> 
> I am working on iommufd kernel interfaces and QEMU changes.  But who is working
> on iommufd support for vdpa? If no one, or not for years, then adding these
> small interfaces to vdpa plugs a signficant gap in live update coverage.
> 
> FWIW, the iommufd interfaces for live update will look much the same: change owner
> and pinned memory accounting, and update virtual addresses.  So adding that to vdpa
> will not make it look like an odd duck.
> 
> - Steve

I think that no one is working on it - Cindy posted some rfcs in January
("vhost-vdpa: add support for iommufd").  Feel free to pick that up.
What you described is just more of a reason not to duplicate this code.
And it's always the same: a small extension here, a small extension there.
If you can make do with existing kernel interfaces, fine,
one can argue that userspace code is useful to support existing kernels.

-- 
MST
Re: [PATCH V2 3/7] vhost-vdpa: VHOST_NEW_OWNER
Posted by Steven Sistare 1 year, 5 months ago
On 7/15/2024 10:38 AM, Michael S. Tsirkin wrote:
> On Mon, Jul 15, 2024 at 10:29:26AM -0400, Steven Sistare wrote:
>> On 7/15/2024 5:07 AM, Michael S. Tsirkin wrote:
>>> On Fri, Jul 12, 2024 at 06:18:49AM -0700, Steve Sistare wrote:
>>>> Add an ioctl to transfer file descriptor ownership and pinned memory
>>>> accounting from one process to another.
>>>>
>>>> This is more efficient than VHOST_RESET_OWNER followed by VHOST_SET_OWNER,
>>>> as that would unpin all physical pages, requiring them to be repinned in
>>>> the new process.  That would cost multiple seconds for large memories, and
>>>> be incurred during a virtual machine's pause time during live update.
>>>>
>>>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>>>
>>> Please, we just need to switch to use iommufd for pinning.
>>> Piling up all these hacks gets us nowhere.
>>
>> I am working on iommufd kernel interfaces and QEMU changes.  But who is working
>> on iommufd support for vdpa? If no one, or not for years, then adding these
>> small interfaces to vdpa plugs a signficant gap in live update coverage.
>>
>> FWIW, the iommufd interfaces for live update will look much the same: change owner
>> and pinned memory accounting, and update virtual addresses.  So adding that to vdpa
>> will not make it look like an odd duck.
>>
>> - Steve
> 
> I think that no one is working on it - Cindy posted some rfcs in January
> ("vhost-vdpa: add support for iommufd").  Feel free to pick that up.
> What you described is just more of a reason not to duplicate this code.
> And it's always the same: a small extension here, a small extension there.
> If you can make do with existing kernel interfaces, fine,
> one can argue that userspace code is useful to support existing kernels.

Respectfully, I will not be volunteering to take that on.  My work funnel to
deliver live update is already too wide.  I hope that folks on this distribution
find the functionality interesting and useful enough to continue to review this
current proposal.

- Steve
Re: [PATCH V2 3/7] vhost-vdpa: VHOST_NEW_OWNER
Posted by Jason Wang 1 year, 5 months ago
On Fri, Jul 12, 2024 at 9:19 PM Steve Sistare <steven.sistare@oracle.com> wrote:
>
> Add an ioctl to transfer file descriptor ownership and pinned memory
> accounting from one process to another.
>
> This is more efficient than VHOST_RESET_OWNER followed by VHOST_SET_OWNER,
> as that would unpin all physical pages, requiring them to be repinned in
> the new process.  That would cost multiple seconds for large memories, and
> be incurred during a virtual machine's pause time during live update.
>
> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> ---
>  drivers/vhost/vdpa.c       | 41 ++++++++++++++++++++++++++++++++++++++
>  drivers/vhost/vhost.c      | 15 ++++++++++++++
>  drivers/vhost/vhost.h      |  1 +
>  include/uapi/linux/vhost.h | 10 ++++++++++
>  4 files changed, 67 insertions(+)
>
> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> index b49e5831b3f0..5cf55ca4ec02 100644
> --- a/drivers/vhost/vdpa.c
> +++ b/drivers/vhost/vdpa.c
> @@ -632,6 +632,44 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v)
>         return ret;
>  }
>
> +static long vhost_vdpa_new_owner(struct vhost_vdpa *v)
> +{
> +       int r;
> +       struct vhost_dev *vdev = &v->vdev;
> +       struct mm_struct *mm_old = vdev->mm;
> +       struct mm_struct *mm_new = current->mm;
> +       long pinned_vm = v->pinned_vm;
> +       unsigned long lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
> +
> +       if (!mm_old)
> +               return -EINVAL;
> +       mmgrab(mm_old);
> +
> +       if (!v->vdpa->use_va &&
> +           pinned_vm + atomic64_read(&mm_new->pinned_vm) > lock_limit) {
> +               r = -ENOMEM;
> +               goto out;
> +       }

So this seems to allow an arbitrary process to execute this. Seems to be unsafe.

I wonder if we need to add some checks here, maybe PID or other stuff
to only allow the owner process to do this.

> +       r = vhost_vdpa_bind_mm(v, mm_new);
> +       if (r)
> +               goto out;
> +
> +       r = vhost_dev_new_owner(vdev);
> +       if (r) {
> +               vhost_vdpa_bind_mm(v, mm_old);
> +               goto out;
> +       }
> +
> +       if (!v->vdpa->use_va) {
> +               atomic64_sub(pinned_vm, &mm_old->pinned_vm);
> +               atomic64_add(pinned_vm, &mm_new->pinned_vm);
> +       }
> +
> +out:
> +       mmdrop(mm_old);
> +       return r;
> +}
> +
>  static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>                                    void __user *argp)
>  {
> @@ -876,6 +914,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
>         case VHOST_VDPA_RESUME:
>                 r = vhost_vdpa_resume(v);
>                 break;
> +       case VHOST_NEW_OWNER:
> +               r = vhost_vdpa_new_owner(v);
> +               break;
>         default:
>                 r = vhost_dev_ioctl(&v->vdev, cmd, argp);
>                 if (r == -ENOIOCTLCMD)
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index b60955682474..ab40ae50552f 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -963,6 +963,21 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
>  }
>  EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
>
> +/* Caller should have device mutex */
> +long vhost_dev_new_owner(struct vhost_dev *dev)
> +{
> +       if (dev->mm == current->mm)
> +               return -EBUSY;
> +
> +       if (!vhost_dev_has_owner(dev))
> +               return -EINVAL;
> +
> +       vhost_detach_mm(dev);
> +       vhost_attach_mm(dev);

This seems to do nothing unless I miss something.

Thanks
Re: [PATCH V2 3/7] vhost-vdpa: VHOST_NEW_OWNER
Posted by Steven Sistare 1 year, 5 months ago
On 7/14/2024 10:26 PM, Jason Wang wrote:
> On Fri, Jul 12, 2024 at 9:19 PM Steve Sistare <steven.sistare@oracle.com> wrote:
>>
>> Add an ioctl to transfer file descriptor ownership and pinned memory
>> accounting from one process to another.
>>
>> This is more efficient than VHOST_RESET_OWNER followed by VHOST_SET_OWNER,
>> as that would unpin all physical pages, requiring them to be repinned in
>> the new process.  That would cost multiple seconds for large memories, and
>> be incurred during a virtual machine's pause time during live update.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>>   drivers/vhost/vdpa.c       | 41 ++++++++++++++++++++++++++++++++++++++
>>   drivers/vhost/vhost.c      | 15 ++++++++++++++
>>   drivers/vhost/vhost.h      |  1 +
>>   include/uapi/linux/vhost.h | 10 ++++++++++
>>   4 files changed, 67 insertions(+)
>>
>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>> index b49e5831b3f0..5cf55ca4ec02 100644
>> --- a/drivers/vhost/vdpa.c
>> +++ b/drivers/vhost/vdpa.c
>> @@ -632,6 +632,44 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v)
>>          return ret;
>>   }
>>
>> +static long vhost_vdpa_new_owner(struct vhost_vdpa *v)
>> +{
>> +       int r;
>> +       struct vhost_dev *vdev = &v->vdev;
>> +       struct mm_struct *mm_old = vdev->mm;
>> +       struct mm_struct *mm_new = current->mm;
>> +       long pinned_vm = v->pinned_vm;
>> +       unsigned long lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
>> +
>> +       if (!mm_old)
>> +               return -EINVAL;
>> +       mmgrab(mm_old);
>> +
>> +       if (!v->vdpa->use_va &&
>> +           pinned_vm + atomic64_read(&mm_new->pinned_vm) > lock_limit) {
>> +               r = -ENOMEM;
>> +               goto out;
>> +       }
> 
> So this seems to allow an arbitrary process to execute this. Seems to be unsafe.
> 
> I wonder if we need to add some checks here, maybe PID or other stuff
> to only allow the owner process to do this.

The original owner must send the file descriptor to the new owner.
That constitutes permission to take ownership.

>> +       r = vhost_vdpa_bind_mm(v, mm_new);
>> +       if (r)
>> +               goto out;
>> +
>> +       r = vhost_dev_new_owner(vdev);
>> +       if (r) {
>> +               vhost_vdpa_bind_mm(v, mm_old);
>> +               goto out;
>> +       }
>> +
>> +       if (!v->vdpa->use_va) {
>> +               atomic64_sub(pinned_vm, &mm_old->pinned_vm);
>> +               atomic64_add(pinned_vm, &mm_new->pinned_vm);
>> +       }
>> +
>> +out:
>> +       mmdrop(mm_old);
>> +       return r;
>> +}
>> +
>>   static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>>                                     void __user *argp)
>>   {
>> @@ -876,6 +914,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
>>          case VHOST_VDPA_RESUME:
>>                  r = vhost_vdpa_resume(v);
>>                  break;
>> +       case VHOST_NEW_OWNER:
>> +               r = vhost_vdpa_new_owner(v);
>> +               break;
>>          default:
>>                  r = vhost_dev_ioctl(&v->vdev, cmd, argp);
>>                  if (r == -ENOIOCTLCMD)
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index b60955682474..ab40ae50552f 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -963,6 +963,21 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
>>   }
>>   EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
>>
>> +/* Caller should have device mutex */
>> +long vhost_dev_new_owner(struct vhost_dev *dev)
>> +{
>> +       if (dev->mm == current->mm)
>> +               return -EBUSY;
>> +
>> +       if (!vhost_dev_has_owner(dev))
>> +               return -EINVAL;
>> +
>> +       vhost_detach_mm(dev);
>> +       vhost_attach_mm(dev);
> 
> This seems to do nothing unless I miss something.

vhost_detach mm drops dev->mm.
vhost_attach_mm grabs current->mm.

- Steve
Re: [PATCH V2 3/7] vhost-vdpa: VHOST_NEW_OWNER
Posted by Jason Wang 1 year, 5 months ago
On Mon, Jul 15, 2024 at 10:27 PM Steven Sistare
<steven.sistare@oracle.com> wrote:
>
> On 7/14/2024 10:26 PM, Jason Wang wrote:
> > On Fri, Jul 12, 2024 at 9:19 PM Steve Sistare <steven.sistare@oracle.com> wrote:
> >>
> >> Add an ioctl to transfer file descriptor ownership and pinned memory
> >> accounting from one process to another.
> >>
> >> This is more efficient than VHOST_RESET_OWNER followed by VHOST_SET_OWNER,
> >> as that would unpin all physical pages, requiring them to be repinned in
> >> the new process.  That would cost multiple seconds for large memories, and
> >> be incurred during a virtual machine's pause time during live update.
> >>
> >> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> >> ---
> >>   drivers/vhost/vdpa.c       | 41 ++++++++++++++++++++++++++++++++++++++
> >>   drivers/vhost/vhost.c      | 15 ++++++++++++++
> >>   drivers/vhost/vhost.h      |  1 +
> >>   include/uapi/linux/vhost.h | 10 ++++++++++
> >>   4 files changed, 67 insertions(+)
> >>
> >> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> >> index b49e5831b3f0..5cf55ca4ec02 100644
> >> --- a/drivers/vhost/vdpa.c
> >> +++ b/drivers/vhost/vdpa.c
> >> @@ -632,6 +632,44 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v)
> >>          return ret;
> >>   }
> >>
> >> +static long vhost_vdpa_new_owner(struct vhost_vdpa *v)
> >> +{
> >> +       int r;
> >> +       struct vhost_dev *vdev = &v->vdev;
> >> +       struct mm_struct *mm_old = vdev->mm;
> >> +       struct mm_struct *mm_new = current->mm;
> >> +       long pinned_vm = v->pinned_vm;
> >> +       unsigned long lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
> >> +
> >> +       if (!mm_old)
> >> +               return -EINVAL;
> >> +       mmgrab(mm_old);
> >> +
> >> +       if (!v->vdpa->use_va &&
> >> +           pinned_vm + atomic64_read(&mm_new->pinned_vm) > lock_limit) {
> >> +               r = -ENOMEM;
> >> +               goto out;
> >> +       }
> >
> > So this seems to allow an arbitrary process to execute this. Seems to be unsafe.
> >
> > I wonder if we need to add some checks here, maybe PID or other stuff
> > to only allow the owner process to do this.
>
> The original owner must send the file descriptor to the new owner.

This seems not to be in the steps you put in the cover letter.

> That constitutes permission to take ownership.

This seems like a relaxed version of the reset_owner:

Currently, reset_owner have the following check:

/* Caller should have device mutex */
long vhost_dev_check_owner(struct vhost_dev *dev)
{
        /* Are you the owner? If not, I don't think you mean to do that */
        return dev->mm == current->mm ? 0 : -EPERM;
}
EXPORT_SYMBOL_GPL(vhost_dev_check_owner);

It means even if the fd is passed to some other process, the reset
owner won't work there.

Thanks

>
> >> +       r = vhost_vdpa_bind_mm(v, mm_new);
> >> +       if (r)
> >> +               goto out;
> >> +
> >> +       r = vhost_dev_new_owner(vdev);
> >> +       if (r) {
> >> +               vhost_vdpa_bind_mm(v, mm_old);
> >> +               goto out;
> >> +       }
> >> +
> >> +       if (!v->vdpa->use_va) {
> >> +               atomic64_sub(pinned_vm, &mm_old->pinned_vm);
> >> +               atomic64_add(pinned_vm, &mm_new->pinned_vm);
> >> +       }
> >> +
> >> +out:
> >> +       mmdrop(mm_old);
> >> +       return r;
> >> +}
> >> +
> >>   static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
> >>                                     void __user *argp)
> >>   {
> >> @@ -876,6 +914,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
> >>          case VHOST_VDPA_RESUME:
> >>                  r = vhost_vdpa_resume(v);
> >>                  break;
> >> +       case VHOST_NEW_OWNER:
> >> +               r = vhost_vdpa_new_owner(v);
> >> +               break;
> >>          default:
> >>                  r = vhost_dev_ioctl(&v->vdev, cmd, argp);
> >>                  if (r == -ENOIOCTLCMD)
> >> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> >> index b60955682474..ab40ae50552f 100644
> >> --- a/drivers/vhost/vhost.c
> >> +++ b/drivers/vhost/vhost.c
> >> @@ -963,6 +963,21 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
> >>   }
> >>   EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
> >>
> >> +/* Caller should have device mutex */
> >> +long vhost_dev_new_owner(struct vhost_dev *dev)
> >> +{
> >> +       if (dev->mm == current->mm)
> >> +               return -EBUSY;
> >> +
> >> +       if (!vhost_dev_has_owner(dev))
> >> +               return -EINVAL;
> >> +
> >> +       vhost_detach_mm(dev);
> >> +       vhost_attach_mm(dev);
> >
> > This seems to do nothing unless I miss something.
>
> vhost_detach mm drops dev->mm.
> vhost_attach_mm grabs current->mm.
>
> - Steve
>
Re: [PATCH V2 3/7] vhost-vdpa: VHOST_NEW_OWNER
Posted by Steven Sistare 1 year, 5 months ago
On 7/16/2024 1:16 AM, Jason Wang wrote:
> On Mon, Jul 15, 2024 at 10:27 PM Steven Sistare
> <steven.sistare@oracle.com> wrote:
>>
>> On 7/14/2024 10:26 PM, Jason Wang wrote:
>>> On Fri, Jul 12, 2024 at 9:19 PM Steve Sistare <steven.sistare@oracle.com> wrote:
>>>>
>>>> Add an ioctl to transfer file descriptor ownership and pinned memory
>>>> accounting from one process to another.
>>>>
>>>> This is more efficient than VHOST_RESET_OWNER followed by VHOST_SET_OWNER,
>>>> as that would unpin all physical pages, requiring them to be repinned in
>>>> the new process.  That would cost multiple seconds for large memories, and
>>>> be incurred during a virtual machine's pause time during live update.
>>>>
>>>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>>>> ---
>>>>    drivers/vhost/vdpa.c       | 41 ++++++++++++++++++++++++++++++++++++++
>>>>    drivers/vhost/vhost.c      | 15 ++++++++++++++
>>>>    drivers/vhost/vhost.h      |  1 +
>>>>    include/uapi/linux/vhost.h | 10 ++++++++++
>>>>    4 files changed, 67 insertions(+)
>>>>
>>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
>>>> index b49e5831b3f0..5cf55ca4ec02 100644
>>>> --- a/drivers/vhost/vdpa.c
>>>> +++ b/drivers/vhost/vdpa.c
>>>> @@ -632,6 +632,44 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v)
>>>>           return ret;
>>>>    }
>>>>
>>>> +static long vhost_vdpa_new_owner(struct vhost_vdpa *v)
>>>> +{
>>>> +       int r;
>>>> +       struct vhost_dev *vdev = &v->vdev;
>>>> +       struct mm_struct *mm_old = vdev->mm;
>>>> +       struct mm_struct *mm_new = current->mm;
>>>> +       long pinned_vm = v->pinned_vm;
>>>> +       unsigned long lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
>>>> +
>>>> +       if (!mm_old)
>>>> +               return -EINVAL;
>>>> +       mmgrab(mm_old);
>>>> +
>>>> +       if (!v->vdpa->use_va &&
>>>> +           pinned_vm + atomic64_read(&mm_new->pinned_vm) > lock_limit) {
>>>> +               r = -ENOMEM;
>>>> +               goto out;
>>>> +       }
>>>
>>> So this seems to allow an arbitrary process to execute this. Seems to be unsafe.
>>>
>>> I wonder if we need to add some checks here, maybe PID or other stuff
>>> to only allow the owner process to do this.
>>
>> The original owner must send the file descriptor to the new owner.
> 
> This seems not to be in the steps you put in the cover letter.

It's there:
   "The vdpa device descriptor, fd, remains open across the exec."

But, I can say more about how fd visibility constitutes permission to changer
owner in this commit message.

>> That constitutes permission to take ownership.
> 
> This seems like a relaxed version of the reset_owner:
> 
> Currently, reset_owner have the following check:

Not relaxed, just different.  A process cannot do anything with fd if it
is not the owner, *except* for becoming the new owner.  Holding the fd is
like holding a key.

- Steve

> /* Caller should have device mutex */
> long vhost_dev_check_owner(struct vhost_dev *dev)
> {
>          /* Are you the owner? If not, I don't think you mean to do that */
>          return dev->mm == current->mm ? 0 : -EPERM;
> }
> EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
> 
> It means even if the fd is passed to some other process, the reset
> owner won't work there.
> 
> Thanks
> 
>>
>>>> +       r = vhost_vdpa_bind_mm(v, mm_new);
>>>> +       if (r)
>>>> +               goto out;
>>>> +
>>>> +       r = vhost_dev_new_owner(vdev);
>>>> +       if (r) {
>>>> +               vhost_vdpa_bind_mm(v, mm_old);
>>>> +               goto out;
>>>> +       }
>>>> +
>>>> +       if (!v->vdpa->use_va) {
>>>> +               atomic64_sub(pinned_vm, &mm_old->pinned_vm);
>>>> +               atomic64_add(pinned_vm, &mm_new->pinned_vm);
>>>> +       }
>>>> +
>>>> +out:
>>>> +       mmdrop(mm_old);
>>>> +       return r;
>>>> +}
>>>> +
>>>>    static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
>>>>                                      void __user *argp)
>>>>    {
>>>> @@ -876,6 +914,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
>>>>           case VHOST_VDPA_RESUME:
>>>>                   r = vhost_vdpa_resume(v);
>>>>                   break;
>>>> +       case VHOST_NEW_OWNER:
>>>> +               r = vhost_vdpa_new_owner(v);
>>>> +               break;
>>>>           default:
>>>>                   r = vhost_dev_ioctl(&v->vdev, cmd, argp);
>>>>                   if (r == -ENOIOCTLCMD)
>>>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>>>> index b60955682474..ab40ae50552f 100644
>>>> --- a/drivers/vhost/vhost.c
>>>> +++ b/drivers/vhost/vhost.c
>>>> @@ -963,6 +963,21 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
>>>>    }
>>>>    EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
>>>>
>>>> +/* Caller should have device mutex */
>>>> +long vhost_dev_new_owner(struct vhost_dev *dev)
>>>> +{
>>>> +       if (dev->mm == current->mm)
>>>> +               return -EBUSY;
>>>> +
>>>> +       if (!vhost_dev_has_owner(dev))
>>>> +               return -EINVAL;
>>>> +
>>>> +       vhost_detach_mm(dev);
>>>> +       vhost_attach_mm(dev);
>>>
>>> This seems to do nothing unless I miss something.
>>
>> vhost_detach mm drops dev->mm.
>> vhost_attach_mm grabs current->mm.
>>
>> - Steve
>>
> 
Re: [PATCH V2 3/7] vhost-vdpa: VHOST_NEW_OWNER
Posted by Jason Wang 1 year, 4 months ago
On Thu, Jul 18, 2024 at 2:28 AM Steven Sistare
<steven.sistare@oracle.com> wrote:
>
> On 7/16/2024 1:16 AM, Jason Wang wrote:
> > On Mon, Jul 15, 2024 at 10:27 PM Steven Sistare
> > <steven.sistare@oracle.com> wrote:
> >>
> >> On 7/14/2024 10:26 PM, Jason Wang wrote:
> >>> On Fri, Jul 12, 2024 at 9:19 PM Steve Sistare <steven.sistare@oracle.com> wrote:
> >>>>
> >>>> Add an ioctl to transfer file descriptor ownership and pinned memory
> >>>> accounting from one process to another.
> >>>>
> >>>> This is more efficient than VHOST_RESET_OWNER followed by VHOST_SET_OWNER,
> >>>> as that would unpin all physical pages, requiring them to be repinned in
> >>>> the new process.  That would cost multiple seconds for large memories, and
> >>>> be incurred during a virtual machine's pause time during live update.
> >>>>
> >>>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> >>>> ---
> >>>>    drivers/vhost/vdpa.c       | 41 ++++++++++++++++++++++++++++++++++++++
> >>>>    drivers/vhost/vhost.c      | 15 ++++++++++++++
> >>>>    drivers/vhost/vhost.h      |  1 +
> >>>>    include/uapi/linux/vhost.h | 10 ++++++++++
> >>>>    4 files changed, 67 insertions(+)
> >>>>
> >>>> diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> >>>> index b49e5831b3f0..5cf55ca4ec02 100644
> >>>> --- a/drivers/vhost/vdpa.c
> >>>> +++ b/drivers/vhost/vdpa.c
> >>>> @@ -632,6 +632,44 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v)
> >>>>           return ret;
> >>>>    }
> >>>>
> >>>> +static long vhost_vdpa_new_owner(struct vhost_vdpa *v)
> >>>> +{
> >>>> +       int r;
> >>>> +       struct vhost_dev *vdev = &v->vdev;
> >>>> +       struct mm_struct *mm_old = vdev->mm;
> >>>> +       struct mm_struct *mm_new = current->mm;
> >>>> +       long pinned_vm = v->pinned_vm;
> >>>> +       unsigned long lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
> >>>> +
> >>>> +       if (!mm_old)
> >>>> +               return -EINVAL;
> >>>> +       mmgrab(mm_old);
> >>>> +
> >>>> +       if (!v->vdpa->use_va &&
> >>>> +           pinned_vm + atomic64_read(&mm_new->pinned_vm) > lock_limit) {
> >>>> +               r = -ENOMEM;
> >>>> +               goto out;
> >>>> +       }
> >>>
> >>> So this seems to allow an arbitrary process to execute this. Seems to be unsafe.
> >>>
> >>> I wonder if we need to add some checks here, maybe PID or other stuff
> >>> to only allow the owner process to do this.
> >>
> >> The original owner must send the file descriptor to the new owner.
> >
> > This seems not to be in the steps you put in the cover letter.
>
> It's there:
>    "The vdpa device descriptor, fd, remains open across the exec."
>
> But, I can say more about how fd visibility constitutes permission to changer
> owner in this commit message.

Yes, that would be great.

>
> >> That constitutes permission to take ownership.
> >
> > This seems like a relaxed version of the reset_owner:
> >
> > Currently, reset_owner have the following check:
>
> Not relaxed, just different.  A process cannot do anything with fd if it
> is not the owner, *except* for becoming the new owner.  Holding the fd is
> like holding a key.

I meant it kind of "defeats" the effort of VHOST_NET_RESET_OWNER ...

Thanks

>
> - Steve
>
> > /* Caller should have device mutex */
> > long vhost_dev_check_owner(struct vhost_dev *dev)
> > {
> >          /* Are you the owner? If not, I don't think you mean to do that */
> >          return dev->mm == current->mm ? 0 : -EPERM;
> > }
> > EXPORT_SYMBOL_GPL(vhost_dev_check_owner);
> >
> > It means even if the fd is passed to some other process, the reset
> > owner won't work there.
> >
> > Thanks
> >
> >>
> >>>> +       r = vhost_vdpa_bind_mm(v, mm_new);
> >>>> +       if (r)
> >>>> +               goto out;
> >>>> +
> >>>> +       r = vhost_dev_new_owner(vdev);
> >>>> +       if (r) {
> >>>> +               vhost_vdpa_bind_mm(v, mm_old);
> >>>> +               goto out;
> >>>> +       }
> >>>> +
> >>>> +       if (!v->vdpa->use_va) {
> >>>> +               atomic64_sub(pinned_vm, &mm_old->pinned_vm);
> >>>> +               atomic64_add(pinned_vm, &mm_new->pinned_vm);
> >>>> +       }
> >>>> +
> >>>> +out:
> >>>> +       mmdrop(mm_old);
> >>>> +       return r;
> >>>> +}
> >>>> +
> >>>>    static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
> >>>>                                      void __user *argp)
> >>>>    {
> >>>> @@ -876,6 +914,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
> >>>>           case VHOST_VDPA_RESUME:
> >>>>                   r = vhost_vdpa_resume(v);
> >>>>                   break;
> >>>> +       case VHOST_NEW_OWNER:
> >>>> +               r = vhost_vdpa_new_owner(v);
> >>>> +               break;
> >>>>           default:
> >>>>                   r = vhost_dev_ioctl(&v->vdev, cmd, argp);
> >>>>                   if (r == -ENOIOCTLCMD)
> >>>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> >>>> index b60955682474..ab40ae50552f 100644
> >>>> --- a/drivers/vhost/vhost.c
> >>>> +++ b/drivers/vhost/vhost.c
> >>>> @@ -963,6 +963,21 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
> >>>>    }
> >>>>    EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
> >>>>
> >>>> +/* Caller should have device mutex */
> >>>> +long vhost_dev_new_owner(struct vhost_dev *dev)
> >>>> +{
> >>>> +       if (dev->mm == current->mm)
> >>>> +               return -EBUSY;
> >>>> +
> >>>> +       if (!vhost_dev_has_owner(dev))
> >>>> +               return -EINVAL;
> >>>> +
> >>>> +       vhost_detach_mm(dev);
> >>>> +       vhost_attach_mm(dev);
> >>>
> >>> This seems to do nothing unless I miss something.
> >>
> >> vhost_detach mm drops dev->mm.
> >> vhost_attach_mm grabs current->mm.
> >>
> >> - Steve
> >>
> >
>
Re: [PATCH V2 3/7] vhost-vdpa: VHOST_NEW_OWNER
Posted by Michael S. Tsirkin 1 year, 5 months ago
On Mon, Jul 15, 2024 at 10:26:13AM +0800, Jason Wang wrote:
> On Fri, Jul 12, 2024 at 9:19 PM Steve Sistare <steven.sistare@oracle.com> wrote:
> >
> > Add an ioctl to transfer file descriptor ownership and pinned memory
> > accounting from one process to another.
> >
> > This is more efficient than VHOST_RESET_OWNER followed by VHOST_SET_OWNER,
> > as that would unpin all physical pages, requiring them to be repinned in
> > the new process.  That would cost multiple seconds for large memories, and
> > be incurred during a virtual machine's pause time during live update.
> >
> > Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
> > ---
> >  drivers/vhost/vdpa.c       | 41 ++++++++++++++++++++++++++++++++++++++
> >  drivers/vhost/vhost.c      | 15 ++++++++++++++
> >  drivers/vhost/vhost.h      |  1 +
> >  include/uapi/linux/vhost.h | 10 ++++++++++
> >  4 files changed, 67 insertions(+)
> >
> > diff --git a/drivers/vhost/vdpa.c b/drivers/vhost/vdpa.c
> > index b49e5831b3f0..5cf55ca4ec02 100644
> > --- a/drivers/vhost/vdpa.c
> > +++ b/drivers/vhost/vdpa.c
> > @@ -632,6 +632,44 @@ static long vhost_vdpa_resume(struct vhost_vdpa *v)
> >         return ret;
> >  }
> >
> > +static long vhost_vdpa_new_owner(struct vhost_vdpa *v)
> > +{
> > +       int r;
> > +       struct vhost_dev *vdev = &v->vdev;
> > +       struct mm_struct *mm_old = vdev->mm;
> > +       struct mm_struct *mm_new = current->mm;
> > +       long pinned_vm = v->pinned_vm;
> > +       unsigned long lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
> > +
> > +       if (!mm_old)
> > +               return -EINVAL;
> > +       mmgrab(mm_old);
> > +
> > +       if (!v->vdpa->use_va &&
> > +           pinned_vm + atomic64_read(&mm_new->pinned_vm) > lock_limit) {
> > +               r = -ENOMEM;
> > +               goto out;
> > +       }
> 
> So this seems to allow an arbitrary process to execute this. Seems to be unsafe.
> 
> I wonder if we need to add some checks here, maybe PID or other stuff
> to only allow the owner process to do this.

Not pid pls.


> > +       r = vhost_vdpa_bind_mm(v, mm_new);
> > +       if (r)
> > +               goto out;
> > +
> > +       r = vhost_dev_new_owner(vdev);
> > +       if (r) {
> > +               vhost_vdpa_bind_mm(v, mm_old);
> > +               goto out;
> > +       }
> > +
> > +       if (!v->vdpa->use_va) {
> > +               atomic64_sub(pinned_vm, &mm_old->pinned_vm);
> > +               atomic64_add(pinned_vm, &mm_new->pinned_vm);
> > +       }
> > +
> > +out:
> > +       mmdrop(mm_old);
> > +       return r;
> > +}
> > +
> >  static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
> >                                    void __user *argp)
> >  {
> > @@ -876,6 +914,9 @@ static long vhost_vdpa_unlocked_ioctl(struct file *filep,
> >         case VHOST_VDPA_RESUME:
> >                 r = vhost_vdpa_resume(v);
> >                 break;
> > +       case VHOST_NEW_OWNER:
> > +               r = vhost_vdpa_new_owner(v);
> > +               break;
> >         default:
> >                 r = vhost_dev_ioctl(&v->vdev, cmd, argp);
> >                 if (r == -ENOIOCTLCMD)
> > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> > index b60955682474..ab40ae50552f 100644
> > --- a/drivers/vhost/vhost.c
> > +++ b/drivers/vhost/vhost.c
> > @@ -963,6 +963,21 @@ long vhost_dev_set_owner(struct vhost_dev *dev)
> >  }
> >  EXPORT_SYMBOL_GPL(vhost_dev_set_owner);
> >
> > +/* Caller should have device mutex */
> > +long vhost_dev_new_owner(struct vhost_dev *dev)
> > +{
> > +       if (dev->mm == current->mm)
> > +               return -EBUSY;
> > +
> > +       if (!vhost_dev_has_owner(dev))
> > +               return -EINVAL;
> > +
> > +       vhost_detach_mm(dev);
> > +       vhost_attach_mm(dev);
> 
> This seems to do nothing unless I miss something.
> 
> Thanks