From: Geliang Tang <tanggeliang@kylinos.cn>
nvme_tcp_try_recv() needs to call .read_sock interface of struct
proto_ops, but it's not implemented in MPTCP.
This patch implements it with reference to __tcp_read_sock() and
__mptcp_recvmsg_mskq().
Corresponding to tcp_recv_skb(), a new helper for MPTCP named
mptcp_recv_skb() is added to peek a skb from sk->sk_receive_queue.
Compared with __mptcp_recvmsg_mskq(), mptcp_read_sock() uses
sk->sk_rcvbuf as the max read length. The LISTEN status is checked
before the while loop, and mptcp_recv_skb() and mptcp_cleanup_rbuf()
are invoked after the loop. In the loop, all flags checks for
__mptcp_recvmsg_mskq() are removed.
Reviewed-by: Hannes Reinecke <hare@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
net/mptcp/protocol.c | 86 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 86 insertions(+)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index b29790a7cea8..75f90f5fc2a3 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -4296,6 +4296,90 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
return mask;
}
+static struct sk_buff *mptcp_recv_skb(struct sock *sk, u32 *off)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct sk_buff *skb;
+ u32 offset;
+
+ if (!list_empty(&msk->backlog_list))
+ mptcp_move_skbs(sk);
+
+ while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
+ offset = MPTCP_SKB_CB(skb)->offset;
+ if (offset < skb->len) {
+ *off = offset;
+ return skb;
+ }
+ mptcp_eat_recv_skb(sk, skb);
+ }
+ return NULL;
+}
+
+/*
+ * Note:
+ * - It is assumed that the socket was locked by the caller.
+ */
+static int __mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor, bool noack)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ size_t len = sk->sk_rcvbuf;
+ struct sk_buff *skb;
+ int copied = 0;
+ u32 offset;
+
+ msk_owned_by_me(msk);
+
+ if (sk->sk_state == TCP_LISTEN)
+ return -ENOTCONN;
+ while ((skb = mptcp_recv_skb(sk, &offset)) != NULL) {
+ u32 data_len = skb->len - offset;
+ int count;
+ u32 size;
+
+ size = min_t(size_t, len - copied, data_len);
+ count = recv_actor(desc, skb, offset, size);
+ if (count <= 0) {
+ if (!copied)
+ copied = count;
+ break;
+ }
+
+ copied += count;
+
+ msk->bytes_consumed += count;
+ if (count < data_len) {
+ MPTCP_SKB_CB(skb)->offset += count;
+ MPTCP_SKB_CB(skb)->map_seq += count;
+ break;
+ }
+
+ mptcp_eat_recv_skb(sk, skb);
+
+ if (copied >= len)
+ break;
+ }
+
+ if (noack)
+ goto out;
+
+ mptcp_rcv_space_adjust(msk, copied);
+
+ if (copied > 0) {
+ mptcp_recv_skb(sk, &offset);
+ mptcp_cleanup_rbuf(msk, copied);
+ }
+out:
+ return copied;
+}
+
+static int mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor)
+{
+ return __mptcp_read_sock(sk, desc, recv_actor, false);
+}
+
static const struct proto_ops mptcp_stream_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
@@ -4316,6 +4400,7 @@ static const struct proto_ops mptcp_stream_ops = {
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
.set_rcvlowat = mptcp_set_rcvlowat,
+ .read_sock = mptcp_read_sock,
};
static struct inet_protosw mptcp_protosw = {
@@ -4420,6 +4505,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
.compat_ioctl = inet6_compat_ioctl,
#endif
.set_rcvlowat = mptcp_set_rcvlowat,
+ .read_sock = mptcp_read_sock,
};
static struct proto mptcp_v6_prot;
--
2.43.0
On Mon, 24 Nov 2025, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
>
> nvme_tcp_try_recv() needs to call .read_sock interface of struct
> proto_ops, but it's not implemented in MPTCP.
>
> This patch implements it with reference to __tcp_read_sock() and
> __mptcp_recvmsg_mskq().
>
> Corresponding to tcp_recv_skb(), a new helper for MPTCP named
> mptcp_recv_skb() is added to peek a skb from sk->sk_receive_queue.
>
> Compared with __mptcp_recvmsg_mskq(), mptcp_read_sock() uses
> sk->sk_rcvbuf as the max read length. The LISTEN status is checked
> before the while loop, and mptcp_recv_skb() and mptcp_cleanup_rbuf()
> are invoked after the loop. In the loop, all flags checks for
> __mptcp_recvmsg_mskq() are removed.
>
> Reviewed-by: Hannes Reinecke <hare@kernel.org>
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
> net/mptcp/protocol.c | 86 ++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 86 insertions(+)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index b29790a7cea8..75f90f5fc2a3 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -4296,6 +4296,90 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
> return mask;
> }
>
> +static struct sk_buff *mptcp_recv_skb(struct sock *sk, u32 *off)
> +{
> + struct mptcp_sock *msk = mptcp_sk(sk);
> + struct sk_buff *skb;
> + u32 offset;
> +
> + if (!list_empty(&msk->backlog_list))
> + mptcp_move_skbs(sk);
> +
> + while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
> + offset = MPTCP_SKB_CB(skb)->offset;
> + if (offset < skb->len) {
> + *off = offset;
> + return skb;
> + }
> + mptcp_eat_recv_skb(sk, skb);
> + }
> + return NULL;
> +}
> +
> +/*
> + * Note:
> + * - It is assumed that the socket was locked by the caller.
> + */
> +static int __mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
> + sk_read_actor_t recv_actor, bool noack)
> +{
> + struct mptcp_sock *msk = mptcp_sk(sk);
> + size_t len = sk->sk_rcvbuf;
Hi Geliang!
Why limit the maximum length here? The socket lock is held already so no
new skbs will be added to sk_receive_queue.
- Mat
> + struct sk_buff *skb;
> + int copied = 0;
> + u32 offset;
> +
> + msk_owned_by_me(msk);
> +
> + if (sk->sk_state == TCP_LISTEN)
> + return -ENOTCONN;
> + while ((skb = mptcp_recv_skb(sk, &offset)) != NULL) {
> + u32 data_len = skb->len - offset;
> + int count;
> + u32 size;
> +
> + size = min_t(size_t, len - copied, data_len);
> + count = recv_actor(desc, skb, offset, size);
> + if (count <= 0) {
> + if (!copied)
> + copied = count;
> + break;
> + }
> +
> + copied += count;
> +
> + msk->bytes_consumed += count;
> + if (count < data_len) {
> + MPTCP_SKB_CB(skb)->offset += count;
> + MPTCP_SKB_CB(skb)->map_seq += count;
> + break;
> + }
> +
> + mptcp_eat_recv_skb(sk, skb);
> +
> + if (copied >= len)
> + break;
> + }
> +
> + if (noack)
> + goto out;
> +
> + mptcp_rcv_space_adjust(msk, copied);
> +
> + if (copied > 0) {
> + mptcp_recv_skb(sk, &offset);
> + mptcp_cleanup_rbuf(msk, copied);
> + }
> +out:
> + return copied;
> +}
> +
> +static int mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
> + sk_read_actor_t recv_actor)
> +{
> + return __mptcp_read_sock(sk, desc, recv_actor, false);
> +}
> +
> static const struct proto_ops mptcp_stream_ops = {
> .family = PF_INET,
> .owner = THIS_MODULE,
> @@ -4316,6 +4400,7 @@ static const struct proto_ops mptcp_stream_ops = {
> .recvmsg = inet_recvmsg,
> .mmap = sock_no_mmap,
> .set_rcvlowat = mptcp_set_rcvlowat,
> + .read_sock = mptcp_read_sock,
> };
>
> static struct inet_protosw mptcp_protosw = {
> @@ -4420,6 +4505,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
> .compat_ioctl = inet6_compat_ioctl,
> #endif
> .set_rcvlowat = mptcp_set_rcvlowat,
> + .read_sock = mptcp_read_sock,
> };
>
> static struct proto mptcp_v6_prot;
> --
> 2.43.0
>
>
>
Hi Mat,
Thank you for your review.
On Fri, 2025-12-05 at 16:17 -0800, Mat Martineau wrote:
> On Mon, 24 Nov 2025, Geliang Tang wrote:
>
> > From: Geliang Tang <tanggeliang@kylinos.cn>
> >
> > nvme_tcp_try_recv() needs to call .read_sock interface of struct
> > proto_ops, but it's not implemented in MPTCP.
> >
> > This patch implements it with reference to __tcp_read_sock() and
> > __mptcp_recvmsg_mskq().
> >
> > Corresponding to tcp_recv_skb(), a new helper for MPTCP named
> > mptcp_recv_skb() is added to peek a skb from sk->sk_receive_queue.
> >
> > Compared with __mptcp_recvmsg_mskq(), mptcp_read_sock() uses
> > sk->sk_rcvbuf as the max read length. The LISTEN status is checked
> > before the while loop, and mptcp_recv_skb() and
> > mptcp_cleanup_rbuf()
> > are invoked after the loop. In the loop, all flags checks for
> > __mptcp_recvmsg_mskq() are removed.
> >
> > Reviewed-by: Hannes Reinecke <hare@kernel.org>
> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> > ---
> > net/mptcp/protocol.c | 86
> > ++++++++++++++++++++++++++++++++++++++++++++
> > 1 file changed, 86 insertions(+)
> >
> > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> > index b29790a7cea8..75f90f5fc2a3 100644
> > --- a/net/mptcp/protocol.c
> > +++ b/net/mptcp/protocol.c
> > @@ -4296,6 +4296,90 @@ static __poll_t mptcp_poll(struct file
> > *file, struct socket *sock,
> > return mask;
> > }
> >
> > +static struct sk_buff *mptcp_recv_skb(struct sock *sk, u32 *off)
> > +{
> > + struct mptcp_sock *msk = mptcp_sk(sk);
> > + struct sk_buff *skb;
> > + u32 offset;
> > +
> > + if (!list_empty(&msk->backlog_list))
> > + mptcp_move_skbs(sk);
> > +
> > + while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
> > + offset = MPTCP_SKB_CB(skb)->offset;
> > + if (offset < skb->len) {
> > + *off = offset;
> > + return skb;
> > + }
> > + mptcp_eat_recv_skb(sk, skb);
> > + }
> > + return NULL;
> > +}
> > +
> > +/*
> > + * Note:
> > + * - It is assumed that the socket was locked by the caller.
> > + */
> > +static int __mptcp_read_sock(struct sock *sk, read_descriptor_t
> > *desc,
> > + sk_read_actor_t recv_actor, bool
> > noack)
> > +{
> > + struct mptcp_sock *msk = mptcp_sk(sk);
> > + size_t len = sk->sk_rcvbuf;
>
> Hi Geliang!
>
> Why limit the maximum length here? The socket lock is held already so
> no
> new skbs will be added to sk_receive_queue.
This is indeed a point where I'm not entirely certain. In v1 and v2
[1], I did set this len to INT_MAX, meaning there is no restriction on
the maximum length.
I revised it to INT_MAX again in v15 and also removed
tcp_recv_should_stop() helper related patches (patch 3, patch 4 in v14)
from this series as you suggested.
With these changes, everything is working fine, and all tests pass.
Please review v15 for me.
Thanks,
-Geliang
[1]
https://patchwork.kernel.org/project/mptcp/patch/32cc946892d634a14cdf4373c1b9d47126162e5a.1749286212.git.tanggeliang@kylinos.cn/
>
> - Mat
>
>
> > + struct sk_buff *skb;
> > + int copied = 0;
> > + u32 offset;
> > +
> > + msk_owned_by_me(msk);
> > +
> > + if (sk->sk_state == TCP_LISTEN)
> > + return -ENOTCONN;
> > + while ((skb = mptcp_recv_skb(sk, &offset)) != NULL) {
> > + u32 data_len = skb->len - offset;
> > + int count;
> > + u32 size;
> > +
> > + size = min_t(size_t, len - copied, data_len);
> > + count = recv_actor(desc, skb, offset, size);
> > + if (count <= 0) {
> > + if (!copied)
> > + copied = count;
> > + break;
> > + }
> > +
> > + copied += count;
> > +
> > + msk->bytes_consumed += count;
> > + if (count < data_len) {
> > + MPTCP_SKB_CB(skb)->offset += count;
> > + MPTCP_SKB_CB(skb)->map_seq += count;
> > + break;
> > + }
> > +
> > + mptcp_eat_recv_skb(sk, skb);
> > +
> > + if (copied >= len)
> > + break;
> > + }
> > +
> > + if (noack)
> > + goto out;
> > +
> > + mptcp_rcv_space_adjust(msk, copied);
> > +
> > + if (copied > 0) {
> > + mptcp_recv_skb(sk, &offset);
> > + mptcp_cleanup_rbuf(msk, copied);
> > + }
> > +out:
> > + return copied;
> > +}
> > +
> > +static int mptcp_read_sock(struct sock *sk, read_descriptor_t
> > *desc,
> > + sk_read_actor_t recv_actor)
> > +{
> > + return __mptcp_read_sock(sk, desc, recv_actor, false);
> > +}
> > +
> > static const struct proto_ops mptcp_stream_ops = {
> > .family = PF_INET,
> > .owner = THIS_MODULE,
> > @@ -4316,6 +4400,7 @@ static const struct proto_ops
> > mptcp_stream_ops = {
> > .recvmsg = inet_recvmsg,
> > .mmap = sock_no_mmap,
> > .set_rcvlowat = mptcp_set_rcvlowat,
> > + .read_sock = mptcp_read_sock,
> > };
> >
> > static struct inet_protosw mptcp_protosw = {
> > @@ -4420,6 +4505,7 @@ static const struct proto_ops
> > mptcp_v6_stream_ops = {
> > .compat_ioctl = inet6_compat_ioctl,
> > #endif
> > .set_rcvlowat = mptcp_set_rcvlowat,
> > + .read_sock = mptcp_read_sock,
> > };
> >
> > static struct proto mptcp_v6_prot;
> > --
> > 2.43.0
> >
> >
> >
© 2016 - 2026 Red Hat, Inc.