drivers/vdpa/vdpa_user/vduse_dev.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-)
Move the message to recv_list before dropping msg_lock and copying the
request to userspace, avoiding a transient unlinked state that can race
with the msg_sync timeout path. Roll back to send_list on copy failures.
Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
Reviewed-by: Xie Yongji <xieyongji@bytedance.com>
---
drivers/vdpa/vdpa_user/vduse_dev.c | 30 ++++++++++++++++++++++--------
1 file changed, 22 insertions(+), 8 deletions(-)
diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
index ae357d014564c..b6a558341c06c 100644
--- a/drivers/vdpa/vdpa_user/vduse_dev.c
+++ b/drivers/vdpa/vdpa_user/vduse_dev.c
@@ -325,6 +325,7 @@ static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
struct file *file = iocb->ki_filp;
struct vduse_dev *dev = file->private_data;
struct vduse_dev_msg *msg;
+ struct vduse_dev_request req;
int size = sizeof(struct vduse_dev_request);
ssize_t ret;
@@ -339,7 +340,7 @@ static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
ret = -EAGAIN;
if (file->f_flags & O_NONBLOCK)
- goto unlock;
+ break;
spin_unlock(&dev->msg_lock);
ret = wait_event_interruptible_exclusive(dev->waitq,
@@ -349,17 +350,30 @@ static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
spin_lock(&dev->msg_lock);
}
+ if (!msg) {
+ spin_unlock(&dev->msg_lock);
+ return ret;
+ }
+
+ memcpy(&req, &msg->req, sizeof(req));
+ /*
+ * Move @msg to recv_list before dropping msg_lock.
+ * This avoids a window where @msg is detached from any list and
+ * vduse_dev_msg_sync() timeout path may operate on an unlinked node.
+ */
+ vduse_enqueue_msg(&dev->recv_list, msg);
spin_unlock(&dev->msg_lock);
- ret = copy_to_iter(&msg->req, size, to);
- spin_lock(&dev->msg_lock);
+
+ ret = copy_to_iter(&req, size, to);
if (ret != size) {
+ spin_lock(&dev->msg_lock);
+ /* Roll back: move msg back to send_list if still pending. */
+ msg = vduse_find_msg(&dev->recv_list, req.request_id);
+ if (msg)
+ vduse_enqueue_msg(&dev->send_list, msg);
+ spin_unlock(&dev->msg_lock);
ret = -EFAULT;
- vduse_enqueue_msg(&dev->send_list, msg);
- goto unlock;
}
- vduse_enqueue_msg(&dev->recv_list, msg);
-unlock:
- spin_unlock(&dev->msg_lock);
return ret;
}
--
2.39.5
On Fri, Jan 30, 2026 at 9:15 AM Zhang Tianci
<zhangtianci.1997@bytedance.com> wrote:
>
> Move the message to recv_list before dropping msg_lock and copying the
> request to userspace, avoiding a transient unlinked state that can race
> with the msg_sync timeout path. Roll back to send_list on copy failures.
>
Missed Fixes: tag and Cc: stable@vger.kernel.org. Or maybe we can
consider this a change in the behavior? I don't think any VDUSE
instance should trust it will never receive that message that it
received partially once but still...
> Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
> Reviewed-by: Xie Yongji <xieyongji@bytedance.com>
> ---
> drivers/vdpa/vdpa_user/vduse_dev.c | 30 ++++++++++++++++++++++--------
> 1 file changed, 22 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> index ae357d014564c..b6a558341c06c 100644
> --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> @@ -325,6 +325,7 @@ static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
> struct file *file = iocb->ki_filp;
> struct vduse_dev *dev = file->private_data;
> struct vduse_dev_msg *msg;
> + struct vduse_dev_request req;
> int size = sizeof(struct vduse_dev_request);
> ssize_t ret;
>
> @@ -339,7 +340,7 @@ static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
>
> ret = -EAGAIN;
> if (file->f_flags & O_NONBLOCK)
> - goto unlock;
> + break;
>
> spin_unlock(&dev->msg_lock);
> ret = wait_event_interruptible_exclusive(dev->waitq,
> @@ -349,17 +350,30 @@ static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
>
> spin_lock(&dev->msg_lock);
> }
> + if (!msg) {
> + spin_unlock(&dev->msg_lock);
> + return ret;
> + }
> +
> + memcpy(&req, &msg->req, sizeof(req));
> + /*
> + * Move @msg to recv_list before dropping msg_lock.
> + * This avoids a window where @msg is detached from any list and
> + * vduse_dev_msg_sync() timeout path may operate on an unlinked node.
But in the timeout case, msg->completed is false so list_del is never
called, isn't it?
Is there even any event that may cause more than one packet in either
queue? Maybe we can simplify a lot of this if we don't have that
assumption.
> + */
> + vduse_enqueue_msg(&dev->recv_list, msg);
> spin_unlock(&dev->msg_lock);
> - ret = copy_to_iter(&msg->req, size, to);
> - spin_lock(&dev->msg_lock);
> +
> + ret = copy_to_iter(&req, size, to);
> if (ret != size) {
> + spin_lock(&dev->msg_lock);
> + /* Roll back: move msg back to send_list if still pending. */
> + msg = vduse_find_msg(&dev->recv_list, req.request_id);
> + if (msg)
> + vduse_enqueue_msg(&dev->send_list, msg);
> + spin_unlock(&dev->msg_lock);
> ret = -EFAULT;
> - vduse_enqueue_msg(&dev->send_list, msg);
> - goto unlock;
> }
> - vduse_enqueue_msg(&dev->recv_list, msg);
> -unlock:
> - spin_unlock(&dev->msg_lock);
>
> return ret;
> }
> --
> 2.39.5
>
Thanks for the patch! yet something to improve:
On Fri, Jan 30, 2026 at 04:15:24PM +0800, Zhang Tianci wrote:
> Move the message to recv_list before dropping msg_lock and copying the
> request to userspace, avoiding a transient unlinked state that can race
> with the msg_sync timeout path. Roll back to send_list on copy failures.
this is not how you write commit messages, though.
describe the problem then how you fix it, please.
something like:
if msg_sync timeout triggers after a message has been removed
from send_list and before it was added to
recv_list, then .... as a result ....
To fix, move the message ...
>
> Signed-off-by: Zhang Tianci <zhangtianci.1997@bytedance.com>
> Reviewed-by: Xie Yongji <xieyongji@bytedance.com>
> ---
> drivers/vdpa/vdpa_user/vduse_dev.c | 30 ++++++++++++++++++++++--------
> 1 file changed, 22 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/vdpa/vdpa_user/vduse_dev.c b/drivers/vdpa/vdpa_user/vduse_dev.c
> index ae357d014564c..b6a558341c06c 100644
> --- a/drivers/vdpa/vdpa_user/vduse_dev.c
> +++ b/drivers/vdpa/vdpa_user/vduse_dev.c
> @@ -325,6 +325,7 @@ static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
> struct file *file = iocb->ki_filp;
> struct vduse_dev *dev = file->private_data;
> struct vduse_dev_msg *msg;
> + struct vduse_dev_request req;
> int size = sizeof(struct vduse_dev_request);
> ssize_t ret;
>
> @@ -339,7 +340,7 @@ static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
>
> ret = -EAGAIN;
> if (file->f_flags & O_NONBLOCK)
> - goto unlock;
> + break;
>
> spin_unlock(&dev->msg_lock);
> ret = wait_event_interruptible_exclusive(dev->waitq,
> @@ -349,17 +350,30 @@ static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
>
> spin_lock(&dev->msg_lock);
> }
> + if (!msg) {
> + spin_unlock(&dev->msg_lock);
> + return ret;
> + }
> +
> + memcpy(&req, &msg->req, sizeof(req));
> + /*
> + * Move @msg to recv_list before dropping msg_lock.
> + * This avoids a window where @msg is detached from any list and
> + * vduse_dev_msg_sync() timeout path may operate on an unlinked node.
> + */
when standing by itself, not as part of the patch, this
comment confuses more than it clarifies.
> + vduse_enqueue_msg(&dev->recv_list, msg);
> spin_unlock(&dev->msg_lock);
> - ret = copy_to_iter(&msg->req, size, to);
> - spin_lock(&dev->msg_lock);
> +
> + ret = copy_to_iter(&req, size, to);
> if (ret != size) {
> + spin_lock(&dev->msg_lock);
> + /* Roll back: move msg back to send_list if still pending. */
> + msg = vduse_find_msg(&dev->recv_list, req.request_id);
Looks like this always scans the whole list.
Make a variant using list_for_each_entry_reverse maybe?
> + if (msg)
> + vduse_enqueue_msg(&dev->send_list, msg);
why is it not a concern that it will be at the tail of the send_list now,
reordering the messages?
> + spin_unlock(&dev->msg_lock);
> ret = -EFAULT;
> - vduse_enqueue_msg(&dev->send_list, msg);
> - goto unlock;
> }
> - vduse_enqueue_msg(&dev->recv_list, msg);
> -unlock:
> - spin_unlock(&dev->msg_lock);
>
> return ret;
> }
> --
> 2.39.5
© 2016 - 2026 Red Hat, Inc.