From: Geliang Tang <tanggeliang@kylinos.cn>
nvme_tcp_try_recv() needs to call .read_sock interface of struct
proto_ops, but it is not implemented in MPTCP.
This patch implements it with reference to __mptcp_recvmsg_mskq().
v2:
- first check the sk_state (Matt), but not look for the end of the
end of a connection like TCP in __tcp_read_sock():
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
break;
This will cause a use-after-free error:
BUG: KASAN: slab-use-after-free in mptcp_read_sock.
v3:
- Use sk->sk_rcvbuf instead of INT_MAX as the max len.
v4:
- invoke __mptcp_move_skbs.
Reviewed-by: Hannes Reinecke <hare@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
net/mptcp/protocol.c | 64 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 64 insertions(+)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2f747ab730e5..10cc8756877e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3956,6 +3956,68 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
return mask;
}
+/*
+ * Note:
+ * - It is assumed that the socket was locked by the caller.
+ */
+static int mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor)
+{
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct scm_timestamping_internal tss;
+ size_t len = sk->sk_rcvbuf;
+ struct sk_buff *skb, *tmp;
+ int copied = 0;
+
+ if (sk->sk_state == TCP_LISTEN)
+ return -ENOTCONN;
+ skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
+ u32 offset = MPTCP_SKB_CB(skb)->offset;
+ u32 data_len = skb->len - offset;
+ u32 size = min_t(size_t, len - copied, data_len);
+ int count;
+
+ count = recv_actor(desc, skb, offset, size);
+ if (count <= 0) {
+ if (!copied)
+ copied = count;
+ break;
+ }
+
+ if (MPTCP_SKB_CB(skb)->has_rxtstamp)
+ tcp_update_recv_tstamps(skb, &tss);
+
+ copied += count;
+
+ if (count < data_len) {
+ MPTCP_SKB_CB(skb)->offset += count;
+ MPTCP_SKB_CB(skb)->map_seq += count;
+ msk->bytes_consumed += count;
+ break;
+ }
+
+ /* avoid the indirect call, we know the destructor is sock_wfree */
+ skb->destructor = NULL;
+ atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+ sk_mem_uncharge(sk, skb->truesize);
+ sk_eat_skb(sk, skb);
+ msk->bytes_consumed += count;
+
+ if (copied >= len)
+ break;
+ }
+
+ mptcp_rcv_space_adjust(msk, copied);
+
+ if (copied > 0) {
+ if (skb_queue_empty(&sk->sk_receive_queue))
+ __mptcp_move_skbs(sk);
+ mptcp_cleanup_rbuf(msk, copied);
+ }
+
+ return copied;
+}
+
static const struct proto_ops mptcp_stream_ops = {
.family = PF_INET,
.owner = THIS_MODULE,
@@ -3976,6 +4038,7 @@ static const struct proto_ops mptcp_stream_ops = {
.recvmsg = inet_recvmsg,
.mmap = sock_no_mmap,
.set_rcvlowat = mptcp_set_rcvlowat,
+ .read_sock = mptcp_read_sock,
};
static struct inet_protosw mptcp_protosw = {
@@ -4080,6 +4143,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
.compat_ioctl = inet6_compat_ioctl,
#endif
.set_rcvlowat = mptcp_set_rcvlowat,
+ .read_sock = mptcp_read_sock,
};
static struct proto mptcp_v6_prot;
--
2.48.1
On 6/27/25 5:59 PM, Geliang Tang wrote: > From: Geliang Tang <tanggeliang@kylinos.cn> > > nvme_tcp_try_recv() needs to call .read_sock interface of struct > proto_ops, but it is not implemented in MPTCP. > > This patch implements it with reference to __mptcp_recvmsg_mskq(). > > v2: > - first check the sk_state (Matt), but not look for the end of the > end of a connection like TCP in __tcp_read_sock(): > > if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) > break; > > This will cause a use-after-free error: > > BUG: KASAN: slab-use-after-free in mptcp_read_sock. > > v3: > - Use sk->sk_rcvbuf instead of INT_MAX as the max len. > > v4: > - invoke __mptcp_move_skbs. > > Reviewed-by: Hannes Reinecke <hare@kernel.org> > Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn> > --- > net/mptcp/protocol.c | 64 ++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 64 insertions(+) > > diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c > index 2f747ab730e5..10cc8756877e 100644 > --- a/net/mptcp/protocol.c > +++ b/net/mptcp/protocol.c > @@ -3956,6 +3956,68 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, > return mask; > } > > +/* > + * Note: > + * - It is assumed that the socket was locked by the caller. > + */ > +static int mptcp_read_sock(struct sock *sk, read_descriptor_t *desc, > + sk_read_actor_t recv_actor) > +{ > + struct mptcp_sock *msk = mptcp_sk(sk); > + struct scm_timestamping_internal tss; > + size_t len = sk->sk_rcvbuf; > + struct sk_buff *skb, *tmp; > + int copied = 0; > + > + if (sk->sk_state == TCP_LISTEN) > + return -ENOTCONN; > + skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) { > + u32 offset = MPTCP_SKB_CB(skb)->offset; > + u32 data_len = skb->len - offset; > + u32 size = min_t(size_t, len - copied, data_len); > + int count; > + > + count = recv_actor(desc, skb, offset, size); > + if (count <= 0) { > + if (!copied) > + copied = count; > + break; > + } > + > + if (MPTCP_SKB_CB(skb)->has_rxtstamp) > + tcp_update_recv_tstamps(skb, &tss); Looking at tcp_read_sock() as the reference, I'm wondering why the above is needed? likely at least a comment is deserved - or drop the statement. > + > + copied += count; > + > + if (count < data_len) { > + MPTCP_SKB_CB(skb)->offset += count; > + MPTCP_SKB_CB(skb)->map_seq += count; > + msk->bytes_consumed += count; > + break; > + } > + > + /* avoid the indirect call, we know the destructor is sock_wfree */ > + skb->destructor = NULL; > + atomic_sub(skb->truesize, &sk->sk_rmem_alloc); > + sk_mem_uncharge(sk, skb->truesize); > + sk_eat_skb(sk, skb); > + msk->bytes_consumed += count; > + > + if (copied >= len) > + break; > + } > + > + mptcp_rcv_space_adjust(msk, copied); > + > + if (copied > 0) { > + if (skb_queue_empty(&sk->sk_receive_queue)) > + __mptcp_move_skbs(sk); This is needed even in the main loop. It's probably better to code a mptcp_recv_skb() that will take care of that (and of offset adjust), and use such helper here and in the main loop. /P
© 2016 - 2025 Red Hat, Inc.