[PATCH mptcp-next v4 2/4] mptcp: implement .read_sock

Geliang Tang posted 4 patches 2 months, 3 weeks ago
There is a newer version of this series
[PATCH mptcp-next v4 2/4] mptcp: implement .read_sock
Posted by Geliang Tang 2 months, 3 weeks ago
From: Geliang Tang <tanggeliang@kylinos.cn>

nvme_tcp_try_recv() needs to call .read_sock interface of struct
proto_ops, but it is not implemented in MPTCP.

This patch implements it with reference to __mptcp_recvmsg_mskq().

v2:
 - first check the sk_state (Matt), but not look for the end of the
end of a connection like TCP in __tcp_read_sock():

	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
		break;

This will cause a use-after-free error:

	BUG: KASAN: slab-use-after-free in mptcp_read_sock.

v3:
 - Use sk->sk_rcvbuf instead of INT_MAX as the max len.

v4:
 - invoke __mptcp_move_skbs.

Reviewed-by: Hannes Reinecke <hare@kernel.org>
Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
---
 net/mptcp/protocol.c | 64 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 2f747ab730e5..10cc8756877e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -3956,6 +3956,68 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
 	return mask;
 }
 
+/*
+ * Note:
+ *	- It is assumed that the socket was locked by the caller.
+ */
+static int mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+			   sk_read_actor_t recv_actor)
+{
+	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct scm_timestamping_internal tss;
+	size_t len = sk->sk_rcvbuf;
+	struct sk_buff *skb, *tmp;
+	int copied = 0;
+
+	if (sk->sk_state == TCP_LISTEN)
+		return -ENOTCONN;
+	skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
+		u32 offset = MPTCP_SKB_CB(skb)->offset;
+		u32 data_len = skb->len - offset;
+		u32 size = min_t(size_t, len - copied, data_len);
+		int count;
+
+		count = recv_actor(desc, skb, offset, size);
+		if (count <= 0) {
+			if (!copied)
+				copied = count;
+			break;
+		}
+
+		if (MPTCP_SKB_CB(skb)->has_rxtstamp)
+			tcp_update_recv_tstamps(skb, &tss);
+
+		copied += count;
+
+		if (count < data_len) {
+			MPTCP_SKB_CB(skb)->offset += count;
+			MPTCP_SKB_CB(skb)->map_seq += count;
+			msk->bytes_consumed += count;
+			break;
+		}
+
+		/* avoid the indirect call, we know the destructor is sock_wfree */
+		skb->destructor = NULL;
+		atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
+		sk_mem_uncharge(sk, skb->truesize);
+		sk_eat_skb(sk, skb);
+		msk->bytes_consumed += count;
+
+		if (copied >= len)
+			break;
+	}
+
+	mptcp_rcv_space_adjust(msk, copied);
+
+	if (copied > 0) {
+		if (skb_queue_empty(&sk->sk_receive_queue))
+			__mptcp_move_skbs(sk);
+		mptcp_cleanup_rbuf(msk, copied);
+	}
+
+	return copied;
+}
+
 static const struct proto_ops mptcp_stream_ops = {
 	.family		   = PF_INET,
 	.owner		   = THIS_MODULE,
@@ -3976,6 +4038,7 @@ static const struct proto_ops mptcp_stream_ops = {
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.set_rcvlowat	   = mptcp_set_rcvlowat,
+	.read_sock	   = mptcp_read_sock,
 };
 
 static struct inet_protosw mptcp_protosw = {
@@ -4080,6 +4143,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
 	.compat_ioctl	   = inet6_compat_ioctl,
 #endif
 	.set_rcvlowat	   = mptcp_set_rcvlowat,
+	.read_sock	   = mptcp_read_sock,
 };
 
 static struct proto mptcp_v6_prot;
-- 
2.48.1
Re: [PATCH mptcp-next v4 2/4] mptcp: implement .read_sock
Posted by Paolo Abeni 2 months, 2 weeks ago
On 6/27/25 5:59 PM, Geliang Tang wrote:
> From: Geliang Tang <tanggeliang@kylinos.cn>
> 
> nvme_tcp_try_recv() needs to call .read_sock interface of struct
> proto_ops, but it is not implemented in MPTCP.
> 
> This patch implements it with reference to __mptcp_recvmsg_mskq().
> 
> v2:
>  - first check the sk_state (Matt), but not look for the end of the
> end of a connection like TCP in __tcp_read_sock():
> 
> 	if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
> 		break;
> 
> This will cause a use-after-free error:
> 
> 	BUG: KASAN: slab-use-after-free in mptcp_read_sock.
> 
> v3:
>  - Use sk->sk_rcvbuf instead of INT_MAX as the max len.
> 
> v4:
>  - invoke __mptcp_move_skbs.
> 
> Reviewed-by: Hannes Reinecke <hare@kernel.org>
> Signed-off-by: Geliang Tang <tanggeliang@kylinos.cn>
> ---
>  net/mptcp/protocol.c | 64 ++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 64 insertions(+)
> 
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 2f747ab730e5..10cc8756877e 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -3956,6 +3956,68 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
>  	return mask;
>  }
>  
> +/*
> + * Note:
> + *	- It is assumed that the socket was locked by the caller.
> + */
> +static int mptcp_read_sock(struct sock *sk, read_descriptor_t *desc,
> +			   sk_read_actor_t recv_actor)
> +{
> +	struct mptcp_sock *msk = mptcp_sk(sk);
> +	struct scm_timestamping_internal tss;
> +	size_t len = sk->sk_rcvbuf;
> +	struct sk_buff *skb, *tmp;
> +	int copied = 0;
> +
> +	if (sk->sk_state == TCP_LISTEN)
> +		return -ENOTCONN;
> +	skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
> +		u32 offset = MPTCP_SKB_CB(skb)->offset;
> +		u32 data_len = skb->len - offset;
> +		u32 size = min_t(size_t, len - copied, data_len);
> +		int count;
> +
> +		count = recv_actor(desc, skb, offset, size);
> +		if (count <= 0) {
> +			if (!copied)
> +				copied = count;
> +			break;
> +		}
> +
> +		if (MPTCP_SKB_CB(skb)->has_rxtstamp)
> +			tcp_update_recv_tstamps(skb, &tss);

Looking at tcp_read_sock() as the reference, I'm wondering why the above
is needed? likely at least a comment is deserved - or drop the statement.

> +
> +		copied += count;
> +
> +		if (count < data_len) {
> +			MPTCP_SKB_CB(skb)->offset += count;
> +			MPTCP_SKB_CB(skb)->map_seq += count;
> +			msk->bytes_consumed += count;
> +			break;
> +		}
> +
> +		/* avoid the indirect call, we know the destructor is sock_wfree */
> +		skb->destructor = NULL;
> +		atomic_sub(skb->truesize, &sk->sk_rmem_alloc);
> +		sk_mem_uncharge(sk, skb->truesize);
> +		sk_eat_skb(sk, skb);
> +		msk->bytes_consumed += count;
> +
> +		if (copied >= len)
> +			break;
> +	}
> +
> +	mptcp_rcv_space_adjust(msk, copied);
> +
> +	if (copied > 0) {
> +		if (skb_queue_empty(&sk->sk_receive_queue))
> +			__mptcp_move_skbs(sk);

This is needed even in the main loop.

It's probably better to code a mptcp_recv_skb() that will take care of
that (and of offset adjust), and use such helper here and in the main loop.

/P