[PATCH mptcp-next v14 2/8] mptcp: implement .read_sock

Geliang Tang posted 8 patches 2 months, 2 weeks ago
There is a newer version of this series
[PATCH mptcp-next v14 2/8] mptcp: implement .read_sock
Posted by Geliang Tang 2 months, 2 weeks ago
From: Geliang Tang <tanggeliang@kylinos.cn>

nvme_tcp_try_recv() needs to call .read_sock interface of struct
proto_ops, but it's not implemented in MPTCP.

This patch implements it with reference to __tcp_read_sock() and
__mptcp_recvmsg_mskq().

Corresponding to tcp_recv_skb(), a new helper for MPTCP named
mptcp_recv_skb() is added to peek a skb from sk->sk_receive_queue.

Compared with __mptcp_recvmsg_mskq(), mptcp_read_sock() uses
sk->sk_rcvbuf as the max read length. The LISTEN status is checked
before the while loop, and mptcp_recv_skb() and mptcp_cleanup_rbuf()
are invoked after the loop. In the loop, all flags checks for
__mptcp_recvmsg_mskq() are removed.

Reviewed-by: Hannes Reinecke <hare@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/mptcp/protocol.c | 86 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b29790a7cea8..75f90f5fc2a3 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -4296,6 +4296,90 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
 	return mask;
 }
 
+static struct sk_buff *mptcp_recv_skb(struct sock *sk, u32 *off)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct sk_buff *skb;
+	u32 offset;
+
+	if (!list_empty(&msk->backlog_list))
+		mptcp_move_skbs(sk);
+
+	while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
+		offset = MPTCP_SKB_CB(skb)->offset;
+		if (offset < skb->len) {
+			*off = offset;
+			return skb;
+		}
+		mptcp_eat_recv_skb(sk, skb);
+	}
+	return NULL;
+}
+
+/*
+ * Note:
+ *	- It is assumed that the socket was locked by the caller.
+ */
+static int __mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+			     sk_read_actor_t recv_actor, bool noack)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	size_t len = sk->sk_rcvbuf;
+	struct sk_buff *skb;
+	int copied = 0;
+	u32 offset;
+
+	msk_owned_by_me(msk);
+
+	if (sk->sk_state == TCP_LISTEN)
+		return -ENOTCONN;
+	while ((skb = mptcp_recv_skb(sk, &offset)) != NULL) {
+		u32 data_len = skb->len - offset;
+		int count;
+		u32 size;
+
+		size = min_t(size_t, len - copied, data_len);
+		count = recv_actor(desc, skb, offset, size);
+		if (count <= 0) {
+			if (!copied)
+				copied = count;
+			break;
+		}
+
+		copied += count;
+
+		msk->bytes_consumed += count;
+		if (count < data_len) {
+			MPTCP_SKB_CB(skb)->offset += count;
+			MPTCP_SKB_CB(skb)->map_seq += count;
+			break;
+		}
+
+		mptcp_eat_recv_skb(sk, skb);
+
+		if (copied >= len)
+			break;
+	}
+
+	if (noack)
+		goto out;
+
+	mptcp_rcv_space_adjust(msk, copied);
+
+	if (copied > 0) {
+		mptcp_recv_skb(sk, &offset);
+		mptcp_cleanup_rbuf(msk, copied);
+	}
+out:
+	return copied;
+}
+
+static int mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+			   sk_read_actor_t recv_actor)
+{
+	return __mptcp_read_sock(sk, desc, recv_actor, false);
+}
+
 static const struct proto_ops mptcp_stream_ops = {
 	.family		   = PF_INET,
 	.owner		   = THIS_MODULE,
@@ -4316,6 +4400,7 @@ static const struct proto_ops mptcp_stream_ops = {
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.set_rcvlowat	   = mptcp_set_rcvlowat,
+	.read_sock	   = mptcp_read_sock,
 };
 
 static struct inet_protosw mptcp_protosw = {
@@ -4420,6 +4505,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
 	.compat_ioctl	   = inet6_compat_ioctl,
 #endif
 	.set_rcvlowat	   = mptcp_set_rcvlowat,
+	.read_sock	   = mptcp_read_sock,
 };
 
 static struct proto mptcp_v6_prot;
-- 
2.43.0
Re: [PATCH mptcp-next v14 2/8] mptcp: implement .read_sock
Posted by Mat Martineau 2 months ago
On Mon, 24 Nov 2025, Geliang Tang wrote:

> From: Geliang Tang <tanggeliang@kylinos.cn>
>
> nvme_tcp_try_recv() needs to call .read_sock interface of struct
> proto_ops, but it's not implemented in MPTCP.
>
> This patch implements it with reference to __tcp_read_sock() and
> __mptcp_recvmsg_mskq().
>
> Corresponding to tcp_recv_skb(), a new helper for MPTCP named
> mptcp_recv_skb() is added to peek a skb from sk->sk_receive_queue.
>
> Compared with __mptcp_recvmsg_mskq(), mptcp_read_sock() uses
> sk->sk_rcvbuf as the max read length. The LISTEN status is checked
> before the while loop, and mptcp_recv_skb() and mptcp_cleanup_rbuf()
> are invoked after the loop. In the loop, all flags checks for
> __mptcp_recvmsg_mskq() are removed.
>
> Reviewed-by: Hannes Reinecke <hare@kernel.org>
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
> net/mptcp/protocol.c | 86 ++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 86 insertions(+)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index b29790a7cea8..75f90f5fc2a3 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -4296,6 +4296,90 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
> 	return mask;
> }
>
> +static struct sk_buff *mptcp_recv_skb(struct sock *sk, u32 *off)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +	struct sk_buff *skb;
> +	u32 offset;
> +
> +	if (!list_empty(&msk->backlog_list))
> +		mptcp_move_skbs(sk);
> +
> +	while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
> +		offset = MPTCP_SKB_CB(skb)->offset;
> +		if (offset < skb->len) {
> +			*off = offset;
> +			return skb;
> +		}
> +		mptcp_eat_recv_skb(sk, skb);
> +	}
> +	return NULL;
> +}
> +
> +/*
> + * Note:
> + *	- It is assumed that the socket was locked by the caller.
> + */
> +static int __mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
> +			     sk_read_actor_t recv_actor, bool noack)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +	size_t len = sk->sk_rcvbuf;

Hi Geliang!

Why limit the maximum length here? The socket lock is held already so no 
new skbs will be added to sk_receive_queue.

- Mat


> +	struct sk_buff *skb;
> +	int copied = 0;
> +	u32 offset;
> +
> +	msk_owned_by_me(msk);
> +
> +	if (sk->sk_state == TCP_LISTEN)
> +		return -ENOTCONN;
> +	while ((skb = mptcp_recv_skb(sk, &offset)) != NULL) {
> +		u32 data_len = skb->len - offset;
> +		int count;
> +		u32 size;
> +
> +		size = min_t(size_t, len - copied, data_len);
> +		count = recv_actor(desc, skb, offset, size);
> +		if (count <= 0) {
> +			if (!copied)
> +				copied = count;
> +			break;
> +		}
> +
> +		copied += count;
> +
> +		msk->bytes_consumed += count;
> +		if (count < data_len) {
> +			MPTCP_SKB_CB(skb)->offset += count;
> +			MPTCP_SKB_CB(skb)->map_seq += count;
> +			break;
> +		}
> +
> +		mptcp_eat_recv_skb(sk, skb);
> +
> +		if (copied >= len)
> +			break;
> +	}
> +
> +	if (noack)
> +		goto out;
> +
> +	mptcp_rcv_space_adjust(msk, copied);
> +
> +	if (copied > 0) {
> +		mptcp_recv_skb(sk, &offset);
> +		mptcp_cleanup_rbuf(msk, copied);
> +	}
> +out:
> +	return copied;
> +}
> +
> +static int mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
> +			   sk_read_actor_t recv_actor)
> +{
> +	return __mptcp_read_sock(sk, desc, recv_actor, false);
> +}
> +
> static const struct proto_ops mptcp_stream_ops = {
> 	.family		   = PF_INET,
> 	.owner		   = THIS_MODULE,
> @@ -4316,6 +4400,7 @@ static const struct proto_ops mptcp_stream_ops = {
> 	.recvmsg	   = inet_recvmsg,
> 	.mmap		   = sock_no_mmap,
> 	.set_rcvlowat	   = mptcp_set_rcvlowat,
> +	.read_sock	   = mptcp_read_sock,
> };
>
> static struct inet_protosw mptcp_protosw = {
> @@ -4420,6 +4505,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
> 	.compat_ioctl	   = inet6_compat_ioctl,
> #endif
> 	.set_rcvlowat	   = mptcp_set_rcvlowat,
> +	.read_sock	   = mptcp_read_sock,
> };
>
> static struct proto mptcp_v6_prot;
> -- 
> 2.43.0
>
>
>
Re: [PATCH mptcp-next v14 2/8] mptcp: implement .read_sock
Posted by Geliang Tang 2 months ago
Hi Mat,

Thank you for your review.

On Fri, 2025-12-05 at 16:17 -0800, Mat Martineau wrote:
> On Mon, 24 Nov 2025, Geliang Tang wrote:
> 
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> > 
> > nvme_tcp_try_recv() needs to call .read_sock interface of struct
> > proto_ops, but it's not implemented in MPTCP.
> > 
> > This patch implements it with reference to __tcp_read_sock() and
> > __mptcp_recvmsg_mskq().
> > 
> > Corresponding to tcp_recv_skb(), a new helper for MPTCP named
> > mptcp_recv_skb() is added to peek a skb from sk->sk_receive_queue.
> > 
> > Compared with __mptcp_recvmsg_mskq(), mptcp_read_sock() uses
> > sk->sk_rcvbuf as the max read length. The LISTEN status is checked
> > before the while loop, and mptcp_recv_skb() and
> > mptcp_cleanup_rbuf()
> > are invoked after the loop. In the loop, all flags checks for
> > __mptcp_recvmsg_mskq() are removed.
> > 
> > Reviewed-by: Hannes Reinecke <hare@kernel.org>
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> > net/mptcp/protocol.c | 86
> > ++++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 86 insertions(+)
> > 
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index b29790a7cea8..75f90f5fc2a3 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -4296,6 +4296,90 @@ static __poll_t mptcp_poll(struct file
> > *file, struct socket *sock,
> > 	return mask;
> > }
> > 
> > +static struct sk_buff *mptcp_recv_skb(struct sock *sk, u32 *off)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > +	struct sk_buff *skb;
> > +	u32 offset;
> > +
> > +	if (!list_empty(&msk->backlog_list))
> > +		mptcp_move_skbs(sk);
> > +
> > +	while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
> > +		offset = MPTCP_SKB_CB(skb)->offset;
> > +		if (offset < skb->len) {
> > +			*off = offset;
> > +			return skb;
> > +		}
> > +		mptcp_eat_recv_skb(sk, skb);
> > +	}
> > +	return NULL;
> > +}
> > +
> > +/*
> > + * Note:
> > + *	- It is assumed that the socket was locked by the caller.
> > + */
> > +static int __mptcp_read_sock(struct sock *sk, read_descriptor_t
> > *desc,
> > +			     sk_read_actor_t recv_actor, bool
> > noack)
> > +{
> > +	struct mptcp_sock *msk = mptcp_sk(sk);
> > +	size_t len = sk->sk_rcvbuf;
> 
> Hi Geliang!
> 
> Why limit the maximum length here? The socket lock is held already so
> no 
> new skbs will be added to sk_receive_queue.

This is indeed a point where I'm not entirely certain. In v1 and v2
[1], I did set this len to INT_MAX, meaning there is no restriction on
the maximum length.

I revised it to INT_MAX again in v15 and also removed
tcp_recv_should_stop() helper related patches (patch 3, patch 4 in v14)
from this series as you suggested.

With these changes, everything is working fine, and all tests pass.
Please review v15 for me.

Thanks,
-Geliang

[1]
https://patchwork.kernel.org/project/mptcp/patch/32cc946892d634a14cdf4373c1b9d47126162e5a.1749286212.git.tanggeliang@kylinos.cn/

> 
> - Mat
> 
> 
> > +	struct sk_buff *skb;
> > +	int copied = 0;
> > +	u32 offset;
> > +
> > +	msk_owned_by_me(msk);
> > +
> > +	if (sk->sk_state == TCP_LISTEN)
> > +		return -ENOTCONN;
> > +	while ((skb = mptcp_recv_skb(sk, &offset)) != NULL) {
> > +		u32 data_len = skb->len - offset;
> > +		int count;
> > +		u32 size;
> > +
> > +		size = min_t(size_t, len - copied, data_len);
> > +		count = recv_actor(desc, skb, offset, size);
> > +		if (count <= 0) {
> > +			if (!copied)
> > +				copied = count;
> > +			break;
> > +		}
> > +
> > +		copied += count;
> > +
> > +		msk->bytes_consumed += count;
> > +		if (count < data_len) {
> > +			MPTCP_SKB_CB(skb)->offset += count;
> > +			MPTCP_SKB_CB(skb)->map_seq += count;
> > +			break;
> > +		}
> > +
> > +		mptcp_eat_recv_skb(sk, skb);
> > +
> > +		if (copied >= len)
> > +			break;
> > +	}
> > +
> > +	if (noack)
> > +		goto out;
> > +
> > +	mptcp_rcv_space_adjust(msk, copied);
> > +
> > +	if (copied > 0) {
> > +		mptcp_recv_skb(sk, &offset);
> > +		mptcp_cleanup_rbuf(msk, copied);
> > +	}
> > +out:
> > +	return copied;
> > +}
> > +
> > +static int mptcp_read_sock(struct sock *sk, read_descriptor_t
> > *desc,
> > +			   sk_read_actor_t recv_actor)
> > +{
> > +	return __mptcp_read_sock(sk, desc, recv_actor, false);
> > +}
> > +
> > static const struct proto_ops mptcp_stream_ops = {
> > 	.family		   = PF_INET,
> > 	.owner		   = THIS_MODULE,
> > @@ -4316,6 +4400,7 @@ static const struct proto_ops
> > mptcp_stream_ops = {
> > 	.recvmsg	   = inet_recvmsg,
> > 	.mmap		   = sock_no_mmap,
> > 	.set_rcvlowat	   = mptcp_set_rcvlowat,
> > +	.read_sock	   = mptcp_read_sock,
> > };
> > 
> > static struct inet_protosw mptcp_protosw = {
> > @@ -4420,6 +4505,7 @@ static const struct proto_ops
> > mptcp_v6_stream_ops = {
> > 	.compat_ioctl	   = inet6_compat_ioctl,
> > #endif
> > 	.set_rcvlowat	   = mptcp_set_rcvlowat,
> > +	.read_sock	   = mptcp_read_sock,
> > };
> > 
> > static struct proto mptcp_v6_prot;
> > -- 
> > 2.43.0
> > 
> > 
> >