[PATCH 2/3] vsock/virtio: Add SIOCINQ support for all virtio based transports

Xuewei Niu posted 3 patches 7 months ago
There is a newer version of this series
[PATCH 2/3] vsock/virtio: Add SIOCINQ support for all virtio based transports
Posted by Xuewei Niu 7 months ago
The virtio_vsock_sock has a new field called bytes_unread as the return
value of the SIOCINQ ioctl.

Though the rx_bytes exists, we introduce a bytes_unread field to the
virtio_vsock_sock struct. The reason is that it will not be updated
until the skbuff is fully consumed, which causes inconsistency.

The byte_unread is increased by the length of the skbuff when skbuff is
enqueued, and it is decreased when dequeued.

Signed-off-by: Xuewei Niu <niuxuewei.nxw@antgroup.com>
---
 drivers/vhost/vsock.c                   |  1 +
 include/linux/virtio_vsock.h            |  2 ++
 net/vmw_vsock/virtio_transport.c        |  1 +
 net/vmw_vsock/virtio_transport_common.c | 17 +++++++++++++++++
 net/vmw_vsock/vsock_loopback.c          |  1 +
 5 files changed, 22 insertions(+)

diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
index 802153e23073..0f20af6e5036 100644
--- a/drivers/vhost/vsock.c
+++ b/drivers/vhost/vsock.c
@@ -452,6 +452,7 @@ static struct virtio_transport vhost_transport = {
 		.notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
 
 		.unsent_bytes             = virtio_transport_unsent_bytes,
+		.unread_bytes             = virtio_transport_unread_bytes,
 
 		.read_skb = virtio_transport_read_skb,
 	},
diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
index 0387d64e2c66..0a7bd240113a 100644
--- a/include/linux/virtio_vsock.h
+++ b/include/linux/virtio_vsock.h
@@ -142,6 +142,7 @@ struct virtio_vsock_sock {
 	u32 buf_alloc;
 	struct sk_buff_head rx_queue;
 	u32 msg_count;
+	size_t bytes_unread;
 };
 
 struct virtio_vsock_pkt_info {
@@ -195,6 +196,7 @@ s64 virtio_transport_stream_has_space(struct vsock_sock *vsk);
 u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk);
 
 ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk);
+ssize_t virtio_transport_unread_bytes(struct vsock_sock *vsk);
 
 void virtio_transport_consume_skb_sent(struct sk_buff *skb,
 				       bool consume);
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index f0e48e6911fc..917881537b63 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -585,6 +585,7 @@ static struct virtio_transport virtio_transport = {
 		.notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
 
 		.unsent_bytes             = virtio_transport_unsent_bytes,
+		.unread_bytes             = virtio_transport_unread_bytes,
 
 		.read_skb = virtio_transport_read_skb,
 	},
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 7f7de6d88096..11eae88c60fc 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -632,6 +632,7 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
 	free_space = vvs->buf_alloc - fwd_cnt_delta;
 	low_rx_bytes = (vvs->rx_bytes <
 			sock_rcvlowat(sk_vsock(vsk), 0, INT_MAX));
+	vvs->bytes_unread -= total;
 
 	spin_unlock_bh(&vvs->rx_lock);
 
@@ -782,6 +783,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
 		}
 
 		virtio_transport_dec_rx_pkt(vvs, pkt_len);
+		vvs->bytes_unread -= pkt_len;
 		kfree_skb(skb);
 	}
 
@@ -1132,6 +1134,19 @@ ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk)
 }
 EXPORT_SYMBOL_GPL(virtio_transport_unsent_bytes);
 
+ssize_t virtio_transport_unread_bytes(struct vsock_sock *vsk)
+{
+	struct virtio_vsock_sock *vvs = vsk->trans;
+	size_t ret;
+
+	spin_lock_bh(&vvs->rx_lock);
+	ret = vvs->bytes_unread;
+	spin_unlock_bh(&vvs->rx_lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(virtio_transport_unread_bytes);
+
 static int virtio_transport_reset(struct vsock_sock *vsk,
 				  struct sk_buff *skb)
 {
@@ -1365,6 +1380,8 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
 		goto out;
 	}
 
+	vvs->bytes_unread += len;
+
 	if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
 		vvs->msg_count++;
 
diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
index 6e78927a598e..13a77db2a76f 100644
--- a/net/vmw_vsock/vsock_loopback.c
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -99,6 +99,7 @@ static struct virtio_transport loopback_transport = {
 		.notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
 
 		.unsent_bytes             = virtio_transport_unsent_bytes,
+		.unread_bytes             = virtio_transport_unread_bytes,
 
 		.read_skb = virtio_transport_read_skb,
 	},
-- 
2.34.1
Re: [PATCH 2/3] vsock/virtio: Add SIOCINQ support for all virtio based transports
Posted by Stefano Garzarella 7 months ago
On Mon, May 19, 2025 at 03:06:48PM +0800, Xuewei Niu wrote:
>The virtio_vsock_sock has a new field called bytes_unread as the return
>value of the SIOCINQ ioctl.
>
>Though the rx_bytes exists, we introduce a bytes_unread field to the
>virtio_vsock_sock struct. The reason is that it will not be updated
>until the skbuff is fully consumed, which causes inconsistency.
>
>The byte_unread is increased by the length of the skbuff when skbuff is
>enqueued, and it is decreased when dequeued.
>
>Signed-off-by: Xuewei Niu <niuxuewei.nxw@antgroup.com>
>---
> drivers/vhost/vsock.c                   |  1 +
> include/linux/virtio_vsock.h            |  2 ++
> net/vmw_vsock/virtio_transport.c        |  1 +
> net/vmw_vsock/virtio_transport_common.c | 17 +++++++++++++++++
> net/vmw_vsock/vsock_loopback.c          |  1 +
> 5 files changed, 22 insertions(+)
>
>diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>index 802153e23073..0f20af6e5036 100644
>--- a/drivers/vhost/vsock.c
>+++ b/drivers/vhost/vsock.c
>@@ -452,6 +452,7 @@ static struct virtio_transport vhost_transport = {
> 		.notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
>
> 		.unsent_bytes             = virtio_transport_unsent_bytes,
>+		.unread_bytes             = virtio_transport_unread_bytes,
>
> 		.read_skb = virtio_transport_read_skb,
> 	},
>diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
>index 0387d64e2c66..0a7bd240113a 100644
>--- a/include/linux/virtio_vsock.h
>+++ b/include/linux/virtio_vsock.h
>@@ -142,6 +142,7 @@ struct virtio_vsock_sock {
> 	u32 buf_alloc;
> 	struct sk_buff_head rx_queue;
> 	u32 msg_count;
>+	size_t bytes_unread;

Can we just use `rx_bytes` field we already have?

Thanks,
Stefano

> };
>
> struct virtio_vsock_pkt_info {
>@@ -195,6 +196,7 @@ s64 virtio_transport_stream_has_space(struct vsock_sock *vsk);
> u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk);
>
> ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk);
>+ssize_t virtio_transport_unread_bytes(struct vsock_sock *vsk);
>
> void virtio_transport_consume_skb_sent(struct sk_buff *skb,
> 				       bool consume);
>diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
>index f0e48e6911fc..917881537b63 100644
>--- a/net/vmw_vsock/virtio_transport.c
>+++ b/net/vmw_vsock/virtio_transport.c
>@@ -585,6 +585,7 @@ static struct virtio_transport virtio_transport = {
> 		.notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
>
> 		.unsent_bytes             = virtio_transport_unsent_bytes,
>+		.unread_bytes             = virtio_transport_unread_bytes,
>
> 		.read_skb = virtio_transport_read_skb,
> 	},
>diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
>index 7f7de6d88096..11eae88c60fc 100644
>--- a/net/vmw_vsock/virtio_transport_common.c
>+++ b/net/vmw_vsock/virtio_transport_common.c
>@@ -632,6 +632,7 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
> 	free_space = vvs->buf_alloc - fwd_cnt_delta;
> 	low_rx_bytes = (vvs->rx_bytes <
> 			sock_rcvlowat(sk_vsock(vsk), 0, INT_MAX));
>+	vvs->bytes_unread -= total;
>
> 	spin_unlock_bh(&vvs->rx_lock);
>
>@@ -782,6 +783,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
> 		}
>
> 		virtio_transport_dec_rx_pkt(vvs, pkt_len);
>+		vvs->bytes_unread -= pkt_len;
> 		kfree_skb(skb);
> 	}
>
>@@ -1132,6 +1134,19 @@ ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk)
> }
> EXPORT_SYMBOL_GPL(virtio_transport_unsent_bytes);
>
>+ssize_t virtio_transport_unread_bytes(struct vsock_sock *vsk)
>+{
>+	struct virtio_vsock_sock *vvs = vsk->trans;
>+	size_t ret;
>+
>+	spin_lock_bh(&vvs->rx_lock);
>+	ret = vvs->bytes_unread;
>+	spin_unlock_bh(&vvs->rx_lock);
>+
>+	return ret;
>+}
>+EXPORT_SYMBOL_GPL(virtio_transport_unread_bytes);
>+
> static int virtio_transport_reset(struct vsock_sock *vsk,
> 				  struct sk_buff *skb)
> {
>@@ -1365,6 +1380,8 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
> 		goto out;
> 	}
>
>+	vvs->bytes_unread += len;
>+
> 	if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
> 		vvs->msg_count++;
>
>diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
>index 6e78927a598e..13a77db2a76f 100644
>--- a/net/vmw_vsock/vsock_loopback.c
>+++ b/net/vmw_vsock/vsock_loopback.c
>@@ -99,6 +99,7 @@ static struct virtio_transport loopback_transport = {
> 		.notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
>
> 		.unsent_bytes             = virtio_transport_unsent_bytes,
>+		.unread_bytes             = virtio_transport_unread_bytes,
>
> 		.read_skb = virtio_transport_read_skb,
> 	},
>-- 
>2.34.1
>
Re: [PATCH 2/3] vsock/virtio: Add SIOCINQ support for all virtio based transports
Posted by Xuewei Niu 7 months ago
> On Mon, May 19, 2025 at 03:06:48PM +0800, Xuewei Niu wrote:
> >The virtio_vsock_sock has a new field called bytes_unread as the return
> >value of the SIOCINQ ioctl.
> >
> >Though the rx_bytes exists, we introduce a bytes_unread field to the
> >virtio_vsock_sock struct. The reason is that it will not be updated
> >until the skbuff is fully consumed, which causes inconsistency.
> >
> >The byte_unread is increased by the length of the skbuff when skbuff is
> >enqueued, and it is decreased when dequeued.
> >
> >Signed-off-by: Xuewei Niu <niuxuewei.nxw@antgroup.com>
> >---
> > drivers/vhost/vsock.c                   |  1 +
> > include/linux/virtio_vsock.h            |  2 ++
> > net/vmw_vsock/virtio_transport.c        |  1 +
> > net/vmw_vsock/virtio_transport_common.c | 17 +++++++++++++++++
> > net/vmw_vsock/vsock_loopback.c          |  1 +
> > 5 files changed, 22 insertions(+)
> >
> >diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> >index 802153e23073..0f20af6e5036 100644
> >--- a/drivers/vhost/vsock.c
> >+++ b/drivers/vhost/vsock.c
> >@@ -452,6 +452,7 @@ static struct virtio_transport vhost_transport = {
> > 		.notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
> >
> > 		.unsent_bytes             = virtio_transport_unsent_bytes,
> >+		.unread_bytes             = virtio_transport_unread_bytes,
> >
> > 		.read_skb = virtio_transport_read_skb,
> > 	},
> >diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> >index 0387d64e2c66..0a7bd240113a 100644
> >--- a/include/linux/virtio_vsock.h
> >+++ b/include/linux/virtio_vsock.h
> >@@ -142,6 +142,7 @@ struct virtio_vsock_sock {
> > 	u32 buf_alloc;
> > 	struct sk_buff_head rx_queue;
> > 	u32 msg_count;
> >+	size_t bytes_unread;
> 
> Can we just use `rx_bytes` field we already have?
> 
> Thanks,
> Stefano

I perfer not. The `rx_bytes` won't be updated until the skbuff is fully
consumed, causing inconsistency issues. If it is acceptable to you, I'll
reuse the field instead.

Thanks,
Xuewei

> > };
> >
> > struct virtio_vsock_pkt_info {
> >@@ -195,6 +196,7 @@ s64 virtio_transport_stream_has_space(struct vsock_sock *vsk);
> > u32 virtio_transport_seqpacket_has_data(struct vsock_sock *vsk);
> >
> > ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk);
> >+ssize_t virtio_transport_unread_bytes(struct vsock_sock *vsk);
> >
> > void virtio_transport_consume_skb_sent(struct sk_buff *skb,
> > 				       bool consume);
> >diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
> >index f0e48e6911fc..917881537b63 100644
> >--- a/net/vmw_vsock/virtio_transport.c
> >+++ b/net/vmw_vsock/virtio_transport.c
> >@@ -585,6 +585,7 @@ static struct virtio_transport virtio_transport = {
> > 		.notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
> >
> > 		.unsent_bytes             = virtio_transport_unsent_bytes,
> >+		.unread_bytes             = virtio_transport_unread_bytes,
> >
> > 		.read_skb = virtio_transport_read_skb,
> > 	},
> >diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> >index 7f7de6d88096..11eae88c60fc 100644
> >--- a/net/vmw_vsock/virtio_transport_common.c
> >+++ b/net/vmw_vsock/virtio_transport_common.c
> >@@ -632,6 +632,7 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
> > 	free_space = vvs->buf_alloc - fwd_cnt_delta;
> > 	low_rx_bytes = (vvs->rx_bytes <
> > 			sock_rcvlowat(sk_vsock(vsk), 0, INT_MAX));
> >+	vvs->bytes_unread -= total;
> >
> > 	spin_unlock_bh(&vvs->rx_lock);
> >
> >@@ -782,6 +783,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
> > 		}
> >
> > 		virtio_transport_dec_rx_pkt(vvs, pkt_len);
> >+		vvs->bytes_unread -= pkt_len;
> > 		kfree_skb(skb);
> > 	}
> >
> >@@ -1132,6 +1134,19 @@ ssize_t virtio_transport_unsent_bytes(struct vsock_sock *vsk)
> > }
> > EXPORT_SYMBOL_GPL(virtio_transport_unsent_bytes);
> >
> >+ssize_t virtio_transport_unread_bytes(struct vsock_sock *vsk)
> >+{
> >+	struct virtio_vsock_sock *vvs = vsk->trans;
> >+	size_t ret;
> >+
> >+	spin_lock_bh(&vvs->rx_lock);
> >+	ret = vvs->bytes_unread;
> >+	spin_unlock_bh(&vvs->rx_lock);
> >+
> >+	return ret;
> >+}
> >+EXPORT_SYMBOL_GPL(virtio_transport_unread_bytes);
> >+
> > static int virtio_transport_reset(struct vsock_sock *vsk,
> > 				  struct sk_buff *skb)
> > {
> >@@ -1365,6 +1380,8 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
> > 		goto out;
> > 	}
> >
> >+	vvs->bytes_unread += len;
> >+
> > 	if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
> > 		vvs->msg_count++;
> >
> >diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
> >index 6e78927a598e..13a77db2a76f 100644
> >--- a/net/vmw_vsock/vsock_loopback.c
> >+++ b/net/vmw_vsock/vsock_loopback.c
> >@@ -99,6 +99,7 @@ static struct virtio_transport loopback_transport = {
> > 		.notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
> >
> > 		.unsent_bytes             = virtio_transport_unsent_bytes,
> >+		.unread_bytes             = virtio_transport_unread_bytes,
> >
> > 		.read_skb = virtio_transport_read_skb,
> > 	},
> >-- 
> >2.34.1
> >
Re: [PATCH 2/3] vsock/virtio: Add SIOCINQ support for all virtio based transports
Posted by Stefano Garzarella 7 months ago
On Wed, May 21, 2025 at 10:06:13AM +0800, Xuewei Niu wrote:
>> On Mon, May 19, 2025 at 03:06:48PM +0800, Xuewei Niu wrote:
>> >The virtio_vsock_sock has a new field called bytes_unread as the return
>> >value of the SIOCINQ ioctl.
>> >
>> >Though the rx_bytes exists, we introduce a bytes_unread field to the
>> >virtio_vsock_sock struct. The reason is that it will not be updated
>> >until the skbuff is fully consumed, which causes inconsistency.
>> >
>> >The byte_unread is increased by the length of the skbuff when skbuff is
>> >enqueued, and it is decreased when dequeued.
>> >
>> >Signed-off-by: Xuewei Niu <niuxuewei.nxw@antgroup.com>
>> >---
>> > drivers/vhost/vsock.c                   |  1 +
>> > include/linux/virtio_vsock.h            |  2 ++
>> > net/vmw_vsock/virtio_transport.c        |  1 +
>> > net/vmw_vsock/virtio_transport_common.c | 17 +++++++++++++++++
>> > net/vmw_vsock/vsock_loopback.c          |  1 +
>> > 5 files changed, 22 insertions(+)
>> >
>> >diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
>> >index 802153e23073..0f20af6e5036 100644
>> >--- a/drivers/vhost/vsock.c
>> >+++ b/drivers/vhost/vsock.c
>> >@@ -452,6 +452,7 @@ static struct virtio_transport vhost_transport = {
>> > 		.notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
>> >
>> > 		.unsent_bytes             = virtio_transport_unsent_bytes,
>> >+		.unread_bytes             = virtio_transport_unread_bytes,
>> >
>> > 		.read_skb = virtio_transport_read_skb,
>> > 	},
>> >diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
>> >index 0387d64e2c66..0a7bd240113a 100644
>> >--- a/include/linux/virtio_vsock.h
>> >+++ b/include/linux/virtio_vsock.h
>> >@@ -142,6 +142,7 @@ struct virtio_vsock_sock {
>> > 	u32 buf_alloc;
>> > 	struct sk_buff_head rx_queue;
>> > 	u32 msg_count;
>> >+	size_t bytes_unread;
>>
>> Can we just use `rx_bytes` field we already have?
>>
>> Thanks,
>> Stefano
>
>I perfer not. The `rx_bytes` won't be updated until the skbuff is fully
>consumed, causing inconsistency issues. If it is acceptable to you, I'll
>reuse the field instead.

I think here we found a little pre-existing issue that should be related
also to what Arseniy (CCed) is trying to fix (low_rx_bytes).

We basically have 2 counters:
- rx_bytes, which we use internally to see if there are bytes to read
   and for sock_rcvlowat
- fwd_cnt, which we use instead for the credit mechanism and informing
   the other peer whether we have space or not

These are updated with virtio_transport_dec_rx_pkt() and 
virtio_transport_inc_rx_pkt()

As far as I can see, from the beginning, we call 
virtio_transport_dec_rx_pkt() only when we consume the entire packet.
This makes sense for `fwd_cnt`, because we still have occupied space in 
memory and we don't want to update the credit until we free all the 
space, but I think it makes no sense for `rx_bytes`, which is only used 
internally and should reflect the current situation of bytes to read.

So in my opinion we should fix it this way (untested):

diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 11eae88c60fc..ee70cb114328 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -449,10 +449,10 @@ static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
  }

  static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
-					u32 len)
+					u32 bytes_read, u32 bytes_dequeued)
  {
-	vvs->rx_bytes -= len;
-	vvs->fwd_cnt += len;
+	vvs->rx_bytes -= bytes_read;
+	vvs->fwd_cnt += bytes_dequeued;
  }

  void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb)
@@ -581,11 +581,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
  				   size_t len)
  {
  	struct virtio_vsock_sock *vvs = vsk->trans;
-	size_t bytes, total = 0;
  	struct sk_buff *skb;
  	u32 fwd_cnt_delta;
  	bool low_rx_bytes;
  	int err = -EFAULT;
+	size_t total = 0;
  	u32 free_space;

  	spin_lock_bh(&vvs->rx_lock);
@@ -597,6 +597,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
  	}

  	while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
+		size_t bytes, dequeued = 0;
+
  		skb = skb_peek(&vvs->rx_queue);

  		bytes = min_t(size_t, len - total,
@@ -620,12 +622,12 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
  		VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes;

  		if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) {
-			u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
-
-			virtio_transport_dec_rx_pkt(vvs, pkt_len);
+			dequeued = le32_to_cpu(virtio_vsock_hdr(skb)->len);
  			__skb_unlink(skb, &vvs->rx_queue);
  			consume_skb(skb);
  		}
+
+		virtio_transport_dec_rx_pkt(vvs, bytes, dequeued);
  	}

  	fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt;
@@ -782,7 +784,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
  				msg->msg_flags |= MSG_EOR;
  		}

-		virtio_transport_dec_rx_pkt(vvs, pkt_len);
+		virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
  		vvs->bytes_unread -= pkt_len;
  		kfree_skb(skb);
  	}
@@ -1752,6 +1754,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
  	struct sock *sk = sk_vsock(vsk);
  	struct virtio_vsock_hdr *hdr;
  	struct sk_buff *skb;
+	u32 pkt_len;
  	int off = 0;
  	int err;

@@ -1769,7 +1772,8 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
  	if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
  		vvs->msg_count--;

-	virtio_transport_dec_rx_pkt(vvs, le32_to_cpu(hdr->len));
+	pkt_len = le32_to_cpu(hdr->len);
+	virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
  	spin_unlock_bh(&vvs->rx_lock);

  	virtio_transport_send_credit_update(vsk);

@Arseniy WDYT?
I will test it and send a proper patch.

@Xuewei with that fixed, I think you can use `rx_bytes`, right?

Also because you missed for example `virtio_transport_read_skb()` used 
by ebpf (see commit 3543152f2d33 ("vsock: Update rx_bytes on 
read_skb()")).

Thanks,
Stefano
Re: [PATCH 2/3] vsock/virtio: Add SIOCINQ support for all virtio based transports
Posted by Xuewei Niu 7 months ago
> On Wed, May 21, 2025 at 10:06:13AM +0800, Xuewei Niu wrote:
> >> On Mon, May 19, 2025 at 03:06:48PM +0800, Xuewei Niu wrote:
> >> >The virtio_vsock_sock has a new field called bytes_unread as the return
> >> >value of the SIOCINQ ioctl.
> >> >
> >> >Though the rx_bytes exists, we introduce a bytes_unread field to the
> >> >virtio_vsock_sock struct. The reason is that it will not be updated
> >> >until the skbuff is fully consumed, which causes inconsistency.
> >> >
> >> >The byte_unread is increased by the length of the skbuff when skbuff is
> >> >enqueued, and it is decreased when dequeued.
> >> >
> >> >Signed-off-by: Xuewei Niu <niuxuewei.nxw@antgroup.com>
> >> >---
> >> > drivers/vhost/vsock.c                   |  1 +
> >> > include/linux/virtio_vsock.h            |  2 ++
> >> > net/vmw_vsock/virtio_transport.c        |  1 +
> >> > net/vmw_vsock/virtio_transport_common.c | 17 +++++++++++++++++
> >> > net/vmw_vsock/vsock_loopback.c          |  1 +
> >> > 5 files changed, 22 insertions(+)
> >> >
> >> >diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> >> >index 802153e23073..0f20af6e5036 100644
> >> >--- a/drivers/vhost/vsock.c
> >> >+++ b/drivers/vhost/vsock.c
> >> >@@ -452,6 +452,7 @@ static struct virtio_transport vhost_transport = {
> >> > 		.notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
> >> >
> >> > 		.unsent_bytes             = virtio_transport_unsent_bytes,
> >> >+		.unread_bytes             = virtio_transport_unread_bytes,
> >> >
> >> > 		.read_skb = virtio_transport_read_skb,
> >> > 	},
> >> >diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> >> >index 0387d64e2c66..0a7bd240113a 100644
> >> >--- a/include/linux/virtio_vsock.h
> >> >+++ b/include/linux/virtio_vsock.h
> >> >@@ -142,6 +142,7 @@ struct virtio_vsock_sock {
> >> > 	u32 buf_alloc;
> >> > 	struct sk_buff_head rx_queue;
> >> > 	u32 msg_count;
> >> >+	size_t bytes_unread;
> >>
> >> Can we just use `rx_bytes` field we already have?
> >>
> >> Thanks,
> >> Stefano
> >
> >I perfer not. The `rx_bytes` won't be updated until the skbuff is fully
> >consumed, causing inconsistency issues. If it is acceptable to you, I'll
> >reuse the field instead.
> 
> I think here we found a little pre-existing issue that should be related
> also to what Arseniy (CCed) is trying to fix (low_rx_bytes).
> 
> We basically have 2 counters:
> - rx_bytes, which we use internally to see if there are bytes to read
>    and for sock_rcvlowat
> - fwd_cnt, which we use instead for the credit mechanism and informing
>    the other peer whether we have space or not
> 
> These are updated with virtio_transport_dec_rx_pkt() and 
> virtio_transport_inc_rx_pkt()
> 
> As far as I can see, from the beginning, we call 
> virtio_transport_dec_rx_pkt() only when we consume the entire packet.
> This makes sense for `fwd_cnt`, because we still have occupied space in 
> memory and we don't want to update the credit until we free all the 
> space, but I think it makes no sense for `rx_bytes`, which is only used 
> internally and should reflect the current situation of bytes to read.
> 
> So in my opinion we should fix it this way (untested):
> 
> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> index 11eae88c60fc..ee70cb114328 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -449,10 +449,10 @@ static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
>   }
> 
>   static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
> -					u32 len)
> +					u32 bytes_read, u32 bytes_dequeued)
>   {
> -	vvs->rx_bytes -= len;
> -	vvs->fwd_cnt += len;
> +	vvs->rx_bytes -= bytes_read;
> +	vvs->fwd_cnt += bytes_dequeued;
>   }
> 
>   void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb)
> @@ -581,11 +581,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
>   				   size_t len)
>   {
>   	struct virtio_vsock_sock *vvs = vsk->trans;
> -	size_t bytes, total = 0;
>   	struct sk_buff *skb;
>   	u32 fwd_cnt_delta;
>   	bool low_rx_bytes;
>   	int err = -EFAULT;
> +	size_t total = 0;
>   	u32 free_space;
> 
>   	spin_lock_bh(&vvs->rx_lock);
> @@ -597,6 +597,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
>   	}
> 
>   	while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
> +		size_t bytes, dequeued = 0;
> +
>   		skb = skb_peek(&vvs->rx_queue);
> 
>   		bytes = min_t(size_t, len - total,
> @@ -620,12 +622,12 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
>   		VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes;
> 
>   		if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) {
> -			u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
> -
> -			virtio_transport_dec_rx_pkt(vvs, pkt_len);
> +			dequeued = le32_to_cpu(virtio_vsock_hdr(skb)->len);
>   			__skb_unlink(skb, &vvs->rx_queue);
>   			consume_skb(skb);
>   		}
> +
> +		virtio_transport_dec_rx_pkt(vvs, bytes, dequeued);
>   	}
> 
>   	fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt;
> @@ -782,7 +784,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
>   				msg->msg_flags |= MSG_EOR;
>   		}
> 
> -		virtio_transport_dec_rx_pkt(vvs, pkt_len);
> +		virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
>   		vvs->bytes_unread -= pkt_len;
>   		kfree_skb(skb);
>   	}
> @@ -1752,6 +1754,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
>   	struct sock *sk = sk_vsock(vsk);
>   	struct virtio_vsock_hdr *hdr;
>   	struct sk_buff *skb;
> +	u32 pkt_len;
>   	int off = 0;
>   	int err;
> 
> @@ -1769,7 +1772,8 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
>   	if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
>   		vvs->msg_count--;
> 
> -	virtio_transport_dec_rx_pkt(vvs, le32_to_cpu(hdr->len));
> +	pkt_len = le32_to_cpu(hdr->len);
> +	virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
>   	spin_unlock_bh(&vvs->rx_lock);
> 
>   	virtio_transport_send_credit_update(vsk);
> 
> @Arseniy WDYT?
> I will test it and send a proper patch.
> 
> @Xuewei with that fixed, I think you can use `rx_bytes`, right?

I've seen your patch, and looks good to me. This will greatly simplify the
SIOCINQ ioctl implementation. I'll rework after your patch gets merged.

Thanks,
Xuewei

> Also because you missed for example `virtio_transport_read_skb()` used 
> by ebpf (see commit 3543152f2d33 ("vsock: Update rx_bytes on 
> read_skb()")).
> 
> Thanks,
> Stefano
Re: [PATCH 2/3] vsock/virtio: Add SIOCINQ support for all virtio based transports
Posted by Stefano Garzarella 7 months ago
Forgot to CC Arseniy.

On Wed, 21 May 2025 at 10:57, Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> On Wed, May 21, 2025 at 10:06:13AM +0800, Xuewei Niu wrote:
> >> On Mon, May 19, 2025 at 03:06:48PM +0800, Xuewei Niu wrote:
> >> >The virtio_vsock_sock has a new field called bytes_unread as the return
> >> >value of the SIOCINQ ioctl.
> >> >
> >> >Though the rx_bytes exists, we introduce a bytes_unread field to the
> >> >virtio_vsock_sock struct. The reason is that it will not be updated
> >> >until the skbuff is fully consumed, which causes inconsistency.
> >> >
> >> >The byte_unread is increased by the length of the skbuff when skbuff is
> >> >enqueued, and it is decreased when dequeued.
> >> >
> >> >Signed-off-by: Xuewei Niu <niuxuewei.nxw@antgroup.com>
> >> >---
> >> > drivers/vhost/vsock.c                   |  1 +
> >> > include/linux/virtio_vsock.h            |  2 ++
> >> > net/vmw_vsock/virtio_transport.c        |  1 +
> >> > net/vmw_vsock/virtio_transport_common.c | 17 +++++++++++++++++
> >> > net/vmw_vsock/vsock_loopback.c          |  1 +
> >> > 5 files changed, 22 insertions(+)
> >> >
> >> >diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> >> >index 802153e23073..0f20af6e5036 100644
> >> >--- a/drivers/vhost/vsock.c
> >> >+++ b/drivers/vhost/vsock.c
> >> >@@ -452,6 +452,7 @@ static struct virtio_transport vhost_transport = {
> >> >            .notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
> >> >
> >> >            .unsent_bytes             = virtio_transport_unsent_bytes,
> >> >+           .unread_bytes             = virtio_transport_unread_bytes,
> >> >
> >> >            .read_skb = virtio_transport_read_skb,
> >> >    },
> >> >diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> >> >index 0387d64e2c66..0a7bd240113a 100644
> >> >--- a/include/linux/virtio_vsock.h
> >> >+++ b/include/linux/virtio_vsock.h
> >> >@@ -142,6 +142,7 @@ struct virtio_vsock_sock {
> >> >    u32 buf_alloc;
> >> >    struct sk_buff_head rx_queue;
> >> >    u32 msg_count;
> >> >+   size_t bytes_unread;
> >>
> >> Can we just use `rx_bytes` field we already have?
> >>
> >> Thanks,
> >> Stefano
> >
> >I perfer not. The `rx_bytes` won't be updated until the skbuff is fully
> >consumed, causing inconsistency issues. If it is acceptable to you, I'll
> >reuse the field instead.
>
> I think here we found a little pre-existing issue that should be related
> also to what Arseniy (CCed) is trying to fix (low_rx_bytes).
>
> We basically have 2 counters:
> - rx_bytes, which we use internally to see if there are bytes to read
>    and for sock_rcvlowat
> - fwd_cnt, which we use instead for the credit mechanism and informing
>    the other peer whether we have space or not
>
> These are updated with virtio_transport_dec_rx_pkt() and
> virtio_transport_inc_rx_pkt()
>
> As far as I can see, from the beginning, we call
> virtio_transport_dec_rx_pkt() only when we consume the entire packet.
> This makes sense for `fwd_cnt`, because we still have occupied space in
> memory and we don't want to update the credit until we free all the
> space, but I think it makes no sense for `rx_bytes`, which is only used
> internally and should reflect the current situation of bytes to read.
>
> So in my opinion we should fix it this way (untested):
>
> diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> index 11eae88c60fc..ee70cb114328 100644
> --- a/net/vmw_vsock/virtio_transport_common.c
> +++ b/net/vmw_vsock/virtio_transport_common.c
> @@ -449,10 +449,10 @@ static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
>   }
>
>   static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
> -                                       u32 len)
> +                                       u32 bytes_read, u32 bytes_dequeued)
>   {
> -       vvs->rx_bytes -= len;
> -       vvs->fwd_cnt += len;
> +       vvs->rx_bytes -= bytes_read;
> +       vvs->fwd_cnt += bytes_dequeued;
>   }
>
>   void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb)
> @@ -581,11 +581,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
>                                    size_t len)
>   {
>         struct virtio_vsock_sock *vvs = vsk->trans;
> -       size_t bytes, total = 0;
>         struct sk_buff *skb;
>         u32 fwd_cnt_delta;
>         bool low_rx_bytes;
>         int err = -EFAULT;
> +       size_t total = 0;
>         u32 free_space;
>
>         spin_lock_bh(&vvs->rx_lock);
> @@ -597,6 +597,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
>         }
>
>         while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
> +               size_t bytes, dequeued = 0;
> +
>                 skb = skb_peek(&vvs->rx_queue);
>
>                 bytes = min_t(size_t, len - total,
> @@ -620,12 +622,12 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
>                 VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes;
>
>                 if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) {
> -                       u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
> -
> -                       virtio_transport_dec_rx_pkt(vvs, pkt_len);
> +                       dequeued = le32_to_cpu(virtio_vsock_hdr(skb)->len);
>                         __skb_unlink(skb, &vvs->rx_queue);
>                         consume_skb(skb);
>                 }
> +
> +               virtio_transport_dec_rx_pkt(vvs, bytes, dequeued);
>         }
>
>         fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt;
> @@ -782,7 +784,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
>                                 msg->msg_flags |= MSG_EOR;
>                 }
>
> -               virtio_transport_dec_rx_pkt(vvs, pkt_len);
> +               virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
>                 vvs->bytes_unread -= pkt_len;
>                 kfree_skb(skb);
>         }
> @@ -1752,6 +1754,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
>         struct sock *sk = sk_vsock(vsk);
>         struct virtio_vsock_hdr *hdr;
>         struct sk_buff *skb;
> +       u32 pkt_len;
>         int off = 0;
>         int err;
>
> @@ -1769,7 +1772,8 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
>         if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
>                 vvs->msg_count--;
>
> -       virtio_transport_dec_rx_pkt(vvs, le32_to_cpu(hdr->len));
> +       pkt_len = le32_to_cpu(hdr->len);
> +       virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
>         spin_unlock_bh(&vvs->rx_lock);
>
>         virtio_transport_send_credit_update(vsk);
>
> @Arseniy WDYT?
> I will test it and send a proper patch.
>
> @Xuewei with that fixed, I think you can use `rx_bytes`, right?
>
> Also because you missed for example `virtio_transport_read_skb()` used
> by ebpf (see commit 3543152f2d33 ("vsock: Update rx_bytes on
> read_skb()")).
>
> Thanks,
> Stefano
Re: [PATCH 2/3] vsock/virtio: Add SIOCINQ support for all virtio based transports
Posted by Stefano Garzarella 7 months ago
On Wed, 21 May 2025 at 10:58, Stefano Garzarella <sgarzare@redhat.com> wrote:
>
> Forgot to CC Arseniy.
>
> On Wed, 21 May 2025 at 10:57, Stefano Garzarella <sgarzare@redhat.com> wrote:
> >
> > On Wed, May 21, 2025 at 10:06:13AM +0800, Xuewei Niu wrote:
> > >> On Mon, May 19, 2025 at 03:06:48PM +0800, Xuewei Niu wrote:
> > >> >The virtio_vsock_sock has a new field called bytes_unread as the return
> > >> >value of the SIOCINQ ioctl.
> > >> >
> > >> >Though the rx_bytes exists, we introduce a bytes_unread field to the
> > >> >virtio_vsock_sock struct. The reason is that it will not be updated
> > >> >until the skbuff is fully consumed, which causes inconsistency.
> > >> >
> > >> >The byte_unread is increased by the length of the skbuff when skbuff is
> > >> >enqueued, and it is decreased when dequeued.
> > >> >
> > >> >Signed-off-by: Xuewei Niu <niuxuewei.nxw@antgroup.com>
> > >> >---
> > >> > drivers/vhost/vsock.c                   |  1 +
> > >> > include/linux/virtio_vsock.h            |  2 ++
> > >> > net/vmw_vsock/virtio_transport.c        |  1 +
> > >> > net/vmw_vsock/virtio_transport_common.c | 17 +++++++++++++++++
> > >> > net/vmw_vsock/vsock_loopback.c          |  1 +
> > >> > 5 files changed, 22 insertions(+)
> > >> >
> > >> >diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> > >> >index 802153e23073..0f20af6e5036 100644
> > >> >--- a/drivers/vhost/vsock.c
> > >> >+++ b/drivers/vhost/vsock.c
> > >> >@@ -452,6 +452,7 @@ static struct virtio_transport vhost_transport = {
> > >> >            .notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
> > >> >
> > >> >            .unsent_bytes             = virtio_transport_unsent_bytes,
> > >> >+           .unread_bytes             = virtio_transport_unread_bytes,
> > >> >
> > >> >            .read_skb = virtio_transport_read_skb,
> > >> >    },
> > >> >diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> > >> >index 0387d64e2c66..0a7bd240113a 100644
> > >> >--- a/include/linux/virtio_vsock.h
> > >> >+++ b/include/linux/virtio_vsock.h
> > >> >@@ -142,6 +142,7 @@ struct virtio_vsock_sock {
> > >> >    u32 buf_alloc;
> > >> >    struct sk_buff_head rx_queue;
> > >> >    u32 msg_count;
> > >> >+   size_t bytes_unread;
> > >>
> > >> Can we just use `rx_bytes` field we already have?
> > >>
> > >> Thanks,
> > >> Stefano
> > >
> > >I perfer not. The `rx_bytes` won't be updated until the skbuff is fully
> > >consumed, causing inconsistency issues. If it is acceptable to you, I'll
> > >reuse the field instead.
> >
> > I think here we found a little pre-existing issue that should be related
> > also to what Arseniy (CCed) is trying to fix (low_rx_bytes).
> >
> > We basically have 2 counters:
> > - rx_bytes, which we use internally to see if there are bytes to read
> >    and for sock_rcvlowat
> > - fwd_cnt, which we use instead for the credit mechanism and informing
> >    the other peer whether we have space or not
> >
> > These are updated with virtio_transport_dec_rx_pkt() and
> > virtio_transport_inc_rx_pkt()
> >
> > As far as I can see, from the beginning, we call
> > virtio_transport_dec_rx_pkt() only when we consume the entire packet.
> > This makes sense for `fwd_cnt`, because we still have occupied space in
> > memory and we don't want to update the credit until we free all the
> > space, but I think it makes no sense for `rx_bytes`, which is only used
> > internally and should reflect the current situation of bytes to read.
> >
> > So in my opinion we should fix it this way (untested):
> >
> > diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> > index 11eae88c60fc..ee70cb114328 100644
> > --- a/net/vmw_vsock/virtio_transport_common.c
> > +++ b/net/vmw_vsock/virtio_transport_common.c
> > @@ -449,10 +449,10 @@ static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
> >   }
> >
> >   static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
> > -                                       u32 len)
> > +                                       u32 bytes_read, u32 bytes_dequeued)
> >   {
> > -       vvs->rx_bytes -= len;
> > -       vvs->fwd_cnt += len;
> > +       vvs->rx_bytes -= bytes_read;
> > +       vvs->fwd_cnt += bytes_dequeued;
> >   }
> >
> >   void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb)
> > @@ -581,11 +581,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
> >                                    size_t len)
> >   {
> >         struct virtio_vsock_sock *vvs = vsk->trans;
> > -       size_t bytes, total = 0;
> >         struct sk_buff *skb;
> >         u32 fwd_cnt_delta;
> >         bool low_rx_bytes;
> >         int err = -EFAULT;
> > +       size_t total = 0;
> >         u32 free_space;
> >
> >         spin_lock_bh(&vvs->rx_lock);
> > @@ -597,6 +597,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
> >         }
> >
> >         while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
> > +               size_t bytes, dequeued = 0;
> > +
> >                 skb = skb_peek(&vvs->rx_queue);
> >
> >                 bytes = min_t(size_t, len - total,
> > @@ -620,12 +622,12 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
> >                 VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes;
> >
> >                 if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) {
> > -                       u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
> > -
> > -                       virtio_transport_dec_rx_pkt(vvs, pkt_len);
> > +                       dequeued = le32_to_cpu(virtio_vsock_hdr(skb)->len);
> >                         __skb_unlink(skb, &vvs->rx_queue);
> >                         consume_skb(skb);
> >                 }
> > +
> > +               virtio_transport_dec_rx_pkt(vvs, bytes, dequeued);
> >         }
> >
> >         fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt;
> > @@ -782,7 +784,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
> >                                 msg->msg_flags |= MSG_EOR;
> >                 }
> >
> > -               virtio_transport_dec_rx_pkt(vvs, pkt_len);
> > +               virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
> >                 vvs->bytes_unread -= pkt_len;
> >                 kfree_skb(skb);
> >         }
> > @@ -1752,6 +1754,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
> >         struct sock *sk = sk_vsock(vsk);
> >         struct virtio_vsock_hdr *hdr;
> >         struct sk_buff *skb;
> > +       u32 pkt_len;
> >         int off = 0;
> >         int err;
> >
> > @@ -1769,7 +1772,8 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
> >         if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
> >                 vvs->msg_count--;
> >
> > -       virtio_transport_dec_rx_pkt(vvs, le32_to_cpu(hdr->len));
> > +       pkt_len = le32_to_cpu(hdr->len);
> > +       virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
> >         spin_unlock_bh(&vvs->rx_lock);
> >
> >         virtio_transport_send_credit_update(vsk);
> >
> > @Arseniy WDYT?
> > I will test it and send a proper patch.
> >
> > @Xuewei with that fixed, I think you can use `rx_bytes`, right?

If it's true, can we just use `vsock_stream_has_data()` return value
instead of adding a new transport's callback?

Thanks,
Stefano

> >
> > Also because you missed for example `virtio_transport_read_skb()` used
> > by ebpf (see commit 3543152f2d33 ("vsock: Update rx_bytes on
> > read_skb()")).
> >
> > Thanks,
> > Stefano
Re: [PATCH 2/3] vsock/virtio: Add SIOCINQ support for all virtio based transports
Posted by Xuewei Niu 7 months ago
> On Wed, 21 May 2025 at 10:58, Stefano Garzarella <sgarzare@redhat.com> wrote:
> >
> > Forgot to CC Arseniy.
> >
> > On Wed, 21 May 2025 at 10:57, Stefano Garzarella <sgarzare@redhat.com> wrote:
> > >
> > > On Wed, May 21, 2025 at 10:06:13AM +0800, Xuewei Niu wrote:
> > > >> On Mon, May 19, 2025 at 03:06:48PM +0800, Xuewei Niu wrote:
> > > >> >The virtio_vsock_sock has a new field called bytes_unread as the return
> > > >> >value of the SIOCINQ ioctl.
> > > >> >
> > > >> >Though the rx_bytes exists, we introduce a bytes_unread field to the
> > > >> >virtio_vsock_sock struct. The reason is that it will not be updated
> > > >> >until the skbuff is fully consumed, which causes inconsistency.
> > > >> >
> > > >> >The byte_unread is increased by the length of the skbuff when skbuff is
> > > >> >enqueued, and it is decreased when dequeued.
> > > >> >
> > > >> >Signed-off-by: Xuewei Niu <niuxuewei.nxw@antgroup.com>
> > > >> >---
> > > >> > drivers/vhost/vsock.c                   |  1 +
> > > >> > include/linux/virtio_vsock.h            |  2 ++
> > > >> > net/vmw_vsock/virtio_transport.c        |  1 +
> > > >> > net/vmw_vsock/virtio_transport_common.c | 17 +++++++++++++++++
> > > >> > net/vmw_vsock/vsock_loopback.c          |  1 +
> > > >> > 5 files changed, 22 insertions(+)
> > > >> >
> > > >> >diff --git a/drivers/vhost/vsock.c b/drivers/vhost/vsock.c
> > > >> >index 802153e23073..0f20af6e5036 100644
> > > >> >--- a/drivers/vhost/vsock.c
> > > >> >+++ b/drivers/vhost/vsock.c
> > > >> >@@ -452,6 +452,7 @@ static struct virtio_transport vhost_transport = {
> > > >> >            .notify_set_rcvlowat      = virtio_transport_notify_set_rcvlowat,
> > > >> >
> > > >> >            .unsent_bytes             = virtio_transport_unsent_bytes,
> > > >> >+           .unread_bytes             = virtio_transport_unread_bytes,
> > > >> >
> > > >> >            .read_skb = virtio_transport_read_skb,
> > > >> >    },
> > > >> >diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h
> > > >> >index 0387d64e2c66..0a7bd240113a 100644
> > > >> >--- a/include/linux/virtio_vsock.h
> > > >> >+++ b/include/linux/virtio_vsock.h
> > > >> >@@ -142,6 +142,7 @@ struct virtio_vsock_sock {
> > > >> >    u32 buf_alloc;
> > > >> >    struct sk_buff_head rx_queue;
> > > >> >    u32 msg_count;
> > > >> >+   size_t bytes_unread;
> > > >>
> > > >> Can we just use `rx_bytes` field we already have?
> > > >>
> > > >> Thanks,
> > > >> Stefano
> > > >
> > > >I perfer not. The `rx_bytes` won't be updated until the skbuff is fully
> > > >consumed, causing inconsistency issues. If it is acceptable to you, I'll
> > > >reuse the field instead.
> > >
> > > I think here we found a little pre-existing issue that should be related
> > > also to what Arseniy (CCed) is trying to fix (low_rx_bytes).
> > >
> > > We basically have 2 counters:
> > > - rx_bytes, which we use internally to see if there are bytes to read
> > >    and for sock_rcvlowat
> > > - fwd_cnt, which we use instead for the credit mechanism and informing
> > >    the other peer whether we have space or not
> > >
> > > These are updated with virtio_transport_dec_rx_pkt() and
> > > virtio_transport_inc_rx_pkt()
> > >
> > > As far as I can see, from the beginning, we call
> > > virtio_transport_dec_rx_pkt() only when we consume the entire packet.
> > > This makes sense for `fwd_cnt`, because we still have occupied space in
> > > memory and we don't want to update the credit until we free all the
> > > space, but I think it makes no sense for `rx_bytes`, which is only used
> > > internally and should reflect the current situation of bytes to read.
> > >
> > > So in my opinion we should fix it this way (untested):
> > >
> > > diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
> > > index 11eae88c60fc..ee70cb114328 100644
> > > --- a/net/vmw_vsock/virtio_transport_common.c
> > > +++ b/net/vmw_vsock/virtio_transport_common.c
> > > @@ -449,10 +449,10 @@ static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
> > >   }
> > >
> > >   static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
> > > -                                       u32 len)
> > > +                                       u32 bytes_read, u32 bytes_dequeued)
> > >   {
> > > -       vvs->rx_bytes -= len;
> > > -       vvs->fwd_cnt += len;
> > > +       vvs->rx_bytes -= bytes_read;
> > > +       vvs->fwd_cnt += bytes_dequeued;
> > >   }
> > >
> > >   void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb)
> > > @@ -581,11 +581,11 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
> > >                                    size_t len)
> > >   {
> > >         struct virtio_vsock_sock *vvs = vsk->trans;
> > > -       size_t bytes, total = 0;
> > >         struct sk_buff *skb;
> > >         u32 fwd_cnt_delta;
> > >         bool low_rx_bytes;
> > >         int err = -EFAULT;
> > > +       size_t total = 0;
> > >         u32 free_space;
> > >
> > >         spin_lock_bh(&vvs->rx_lock);
> > > @@ -597,6 +597,8 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
> > >         }
> > >
> > >         while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
> > > +               size_t bytes, dequeued = 0;
> > > +
> > >                 skb = skb_peek(&vvs->rx_queue);
> > >
> > >                 bytes = min_t(size_t, len - total,
> > > @@ -620,12 +622,12 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
> > >                 VIRTIO_VSOCK_SKB_CB(skb)->offset += bytes;
> > >
> > >                 if (skb->len == VIRTIO_VSOCK_SKB_CB(skb)->offset) {
> > > -                       u32 pkt_len = le32_to_cpu(virtio_vsock_hdr(skb)->len);
> > > -
> > > -                       virtio_transport_dec_rx_pkt(vvs, pkt_len);
> > > +                       dequeued = le32_to_cpu(virtio_vsock_hdr(skb)->len);
> > >                         __skb_unlink(skb, &vvs->rx_queue);
> > >                         consume_skb(skb);
> > >                 }
> > > +
> > > +               virtio_transport_dec_rx_pkt(vvs, bytes, dequeued);
> > >         }
> > >
> > >         fwd_cnt_delta = vvs->fwd_cnt - vvs->last_fwd_cnt;
> > > @@ -782,7 +784,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
> > >                                 msg->msg_flags |= MSG_EOR;
> > >                 }
> > >
> > > -               virtio_transport_dec_rx_pkt(vvs, pkt_len);
> > > +               virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
> > >                 vvs->bytes_unread -= pkt_len;
> > >                 kfree_skb(skb);
> > >         }
> > > @@ -1752,6 +1754,7 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
> > >         struct sock *sk = sk_vsock(vsk);
> > >         struct virtio_vsock_hdr *hdr;
> > >         struct sk_buff *skb;
> > > +       u32 pkt_len;
> > >         int off = 0;
> > >         int err;
> > >
> > > @@ -1769,7 +1772,8 @@ int virtio_transport_read_skb(struct vsock_sock *vsk, skb_read_actor_t recv_acto
> > >         if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
> > >                 vvs->msg_count--;
> > >
> > > -       virtio_transport_dec_rx_pkt(vvs, le32_to_cpu(hdr->len));
> > > +       pkt_len = le32_to_cpu(hdr->len);
> > > +       virtio_transport_dec_rx_pkt(vvs, pkt_len, pkt_len);
> > >         spin_unlock_bh(&vvs->rx_lock);
> > >
> > >         virtio_transport_send_credit_update(vsk);
> > >
> > > @Arseniy WDYT?
> > > I will test it and send a proper patch.
> > >
> > > @Xuewei with that fixed, I think you can use `rx_bytes`, right?
> 
> If it's true, can we just use `vsock_stream_has_data()` return value
> instead of adding a new transport's callback?
> 
> Thanks,
> Stefano

Nice catch! Will do.

Thanks,
Xuewei

> > >
> > > Also because you missed for example `virtio_transport_read_skb()` used
> > > by ebpf (see commit 3543152f2d33 ("vsock: Update rx_bytes on
> > > read_skb()")).
> > >
> > > Thanks,
> > > Stefano