[PATCH v6 mptcp-next 05/11] mptcp: fix MSG_PEEK stream corruption

Paolo Abeni posted 11 patches 3 months, 2 weeks ago
There is a newer version of this series
[PATCH v6 mptcp-next 05/11] mptcp: fix MSG_PEEK stream corruption
Posted by Paolo Abeni 3 months, 2 weeks ago
If a MSG_PEEK | MSG_WAITALL read operation consumes all the bytes in the
receive queue and recvmsg() need to waits for more data - i.e. it's a
blocking one - upon arrival of the next packet the MPTCP protocol will
start again copying the oldest data present in the receive queue,
corrupting the data stream.

Address the issue explicitly tracking the peeked sequence number,
restarting from the last peeked byte.

Fixes: ca4fb892579f ("mptcp: add MSG_PEEK support")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
This may sound quite esoteric, but it will soon become very easy to
reproduce with mptcp_connect, thanks to the backlog.
---
 net/mptcp/protocol.c | 38 +++++++++++++++++++++++++-------------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 51c55a45aeaccd..200b657080eb3e 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1947,22 +1947,36 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied);
 
-static int __mptcp_recvmsg_mskq(struct sock *sk,
-				struct msghdr *msg,
-				size_t len, int flags,
+static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg,
+				size_t len, int flags, int copied_total,
 				struct scm_timestamping_internal *tss,
 				int *cmsg_flags)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
 	struct sk_buff *skb, *tmp;
+	int total_data_len = 0;
 	int copied = 0;
 
 	skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
-		u32 offset = MPTCP_SKB_CB(skb)->offset;
+		u32 delta, offset = MPTCP_SKB_CB(skb)->offset;
 		u32 data_len = skb->len - offset;
-		u32 count = min_t(size_t, len - copied, data_len);
+		u32 count;
 		int err;
 
+		if (flags & MSG_PEEK) {
+			/* skip already peeked skbs*/
+			if (total_data_len + data_len <= copied_total) {
+				total_data_len += data_len;
+				continue;
+			}
+
+			/* skip the already peeked data in the current skb */
+			delta = copied_total - total_data_len;
+			offset += delta;
+			data_len -= delta;
+		}
+
+		count = min_t(size_t, len - copied, data_len);
 		if (!(flags & MSG_TRUNC)) {
 			err = skb_copy_datagram_msg(skb, offset, msg, count);
 			if (unlikely(err < 0)) {
@@ -1979,16 +1993,14 @@ static int __mptcp_recvmsg_mskq(struct sock *sk,
 
 		copied += count;
 
-		if (count < data_len) {
-			if (!(flags & MSG_PEEK)) {
+		if (!(flags & MSG_PEEK)) {
+			msk->bytes_consumed += count;
+			if (count < data_len) {
 				MPTCP_SKB_CB(skb)->offset += count;
 				MPTCP_SKB_CB(skb)->map_seq += count;
-				msk->bytes_consumed += count;
+				break;
 			}
-			break;
-		}
 
-		if (!(flags & MSG_PEEK)) {
 			/* avoid the indirect call, we know the destructor is sock_rfree */
 			skb->destructor = NULL;
 			skb->sk = NULL;
@@ -1996,7 +2008,6 @@ static int __mptcp_recvmsg_mskq(struct sock *sk,
 			sk_mem_uncharge(sk, skb->truesize);
 			__skb_unlink(skb, &sk->sk_receive_queue);
 			skb_attempt_defer_free(skb);
-			msk->bytes_consumed += count;
 		}
 
 		if (copied >= len)
@@ -2194,7 +2205,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	while (copied < len) {
 		int err, bytes_read;
 
-		bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags);
+		bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags,
+						  copied, &tss, &cmsg_flags);
 		if (unlikely(bytes_read < 0)) {
 			if (!copied)
 				copied = bytes_read;
-- 
2.51.0
Re: [PATCH v6 mptcp-next 05/11] mptcp: fix MSG_PEEK stream corruption
Posted by Mat Martineau 3 months, 2 weeks ago
On Wed, 22 Oct 2025, Paolo Abeni wrote:

> If a MSG_PEEK | MSG_WAITALL read operation consumes all the bytes in the
> receive queue and recvmsg() need to waits for more data - i.e. it's a
> blocking one - upon arrival of the next packet the MPTCP protocol will
> start again copying the oldest data present in the receive queue,
> corrupting the data stream.
>
> Address the issue explicitly tracking the peeked sequence number,
> restarting from the last peeked byte.
>
> Fixes: ca4fb892579f ("mptcp: add MSG_PEEK support")
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> This may sound quite esoteric, but it will soon become very easy to
> reproduce with mptcp_connect, thanks to the backlog.

Would it be good to apply this to -net?

- Mat

> ---
> net/mptcp/protocol.c | 38 +++++++++++++++++++++++++-------------
> 1 file changed, 25 insertions(+), 13 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 51c55a45aeaccd..200b657080eb3e 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1947,22 +1947,36 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>
> static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied);
>
> -static int __mptcp_recvmsg_mskq(struct sock *sk,
> -				struct msghdr *msg,
> -				size_t len, int flags,
> +static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg,
> +				size_t len, int flags, int copied_total,
> 				struct scm_timestamping_internal *tss,
> 				int *cmsg_flags)
> {
> 	struct mptcp_sock *msk = mptcp_sk(sk);
> 	struct sk_buff *skb, *tmp;
> +	int total_data_len = 0;
> 	int copied = 0;
>
> 	skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
> -		u32 offset = MPTCP_SKB_CB(skb)->offset;
> +		u32 delta, offset = MPTCP_SKB_CB(skb)->offset;
> 		u32 data_len = skb->len - offset;
> -		u32 count = min_t(size_t, len - copied, data_len);
> +		u32 count;
> 		int err;
>
> +		if (flags & MSG_PEEK) {
> +			/* skip already peeked skbs*/
> +			if (total_data_len + data_len <= copied_total) {
> +				total_data_len += data_len;
> +				continue;
> +			}
> +
> +			/* skip the already peeked data in the current skb */
> +			delta = copied_total - total_data_len;
> +			offset += delta;
> +			data_len -= delta;
> +		}
> +
> +		count = min_t(size_t, len - copied, data_len);
> 		if (!(flags & MSG_TRUNC)) {
> 			err = skb_copy_datagram_msg(skb, offset, msg, count);
> 			if (unlikely(err < 0)) {
> @@ -1979,16 +1993,14 @@ static int __mptcp_recvmsg_mskq(struct sock *sk,
>
> 		copied += count;
>
> -		if (count < data_len) {
> -			if (!(flags & MSG_PEEK)) {
> +		if (!(flags & MSG_PEEK)) {
> +			msk->bytes_consumed += count;
> +			if (count < data_len) {
> 				MPTCP_SKB_CB(skb)->offset += count;
> 				MPTCP_SKB_CB(skb)->map_seq += count;
> -				msk->bytes_consumed += count;
> +				break;
> 			}
> -			break;
> -		}
>
> -		if (!(flags & MSG_PEEK)) {
> 			/* avoid the indirect call, we know the destructor is sock_rfree */
> 			skb->destructor = NULL;
> 			skb->sk = NULL;
> @@ -1996,7 +2008,6 @@ static int __mptcp_recvmsg_mskq(struct sock *sk,
> 			sk_mem_uncharge(sk, skb->truesize);
> 			__skb_unlink(skb, &sk->sk_receive_queue);
> 			skb_attempt_defer_free(skb);
> -			msk->bytes_consumed += count;
> 		}
>
> 		if (copied >= len)
> @@ -2194,7 +2205,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> 	while (copied < len) {
> 		int err, bytes_read;
>
> -		bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags);
> +		bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags,
> +						  copied, &tss, &cmsg_flags);
> 		if (unlikely(bytes_read < 0)) {
> 			if (!copied)
> 				copied = bytes_read;
> -- 
> 2.51.0
>
>
>
Re: [PATCH v6 mptcp-next 05/11] mptcp: fix MSG_PEEK stream corruption
Posted by Paolo Abeni 3 months, 2 weeks ago
On 10/23/25 6:56 PM, Mat Martineau wrote:
> On Wed, 22 Oct 2025, Paolo Abeni wrote:
> 
>> If a MSG_PEEK | MSG_WAITALL read operation consumes all the bytes in the
>> receive queue and recvmsg() need to waits for more data - i.e. it's a
>> blocking one - upon arrival of the next packet the MPTCP protocol will
>> start again copying the oldest data present in the receive queue,
>> corrupting the data stream.
>>
>> Address the issue explicitly tracking the peeked sequence number,
>> restarting from the last peeked byte.
>>
>> Fixes: ca4fb892579f ("mptcp: add MSG_PEEK support")
>> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
>> ---
>> This may sound quite esoteric, but it will soon become very easy to
>> reproduce with mptcp_connect, thanks to the backlog.
> 
> Would it be good to apply this to -net?

FWIW, I'm fine with applying this to -net.

Thanks,

Paolo