On Wed, 22 Oct 2025, Paolo Abeni wrote:
> If a MSG_PEEK | MSG_WAITALL read operation consumes all the bytes in the
> receive queue and recvmsg() need to waits for more data - i.e. it's a
> blocking one - upon arrival of the next packet the MPTCP protocol will
> start again copying the oldest data present in the receive queue,
> corrupting the data stream.
>
> Address the issue explicitly tracking the peeked sequence number,
> restarting from the last peeked byte.
>
> Fixes: ca4fb892579f ("mptcp: add MSG_PEEK support")
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> ---
> This may sound quite esoteric, but it will soon become very easy to
> reproduce with mptcp_connect, thanks to the backlog.
Would it be good to apply this to -net?
- Mat
> ---
> net/mptcp/protocol.c | 38 +++++++++++++++++++++++++-------------
> 1 file changed, 25 insertions(+), 13 deletions(-)
>
> diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
> index 51c55a45aeaccd..200b657080eb3e 100644
> --- a/net/mptcp/protocol.c
> +++ b/net/mptcp/protocol.c
> @@ -1947,22 +1947,36 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
>
> static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied);
>
> -static int __mptcp_recvmsg_mskq(struct sock *sk,
> - struct msghdr *msg,
> - size_t len, int flags,
> +static int __mptcp_recvmsg_mskq(struct sock *sk, struct msghdr *msg,
> + size_t len, int flags, int copied_total,
> struct scm_timestamping_internal *tss,
> int *cmsg_flags)
> {
> struct mptcp_sock *msk = mptcp_sk(sk);
> struct sk_buff *skb, *tmp;
> + int total_data_len = 0;
> int copied = 0;
>
> skb_queue_walk_safe(&sk->sk_receive_queue, skb, tmp) {
> - u32 offset = MPTCP_SKB_CB(skb)->offset;
> + u32 delta, offset = MPTCP_SKB_CB(skb)->offset;
> u32 data_len = skb->len - offset;
> - u32 count = min_t(size_t, len - copied, data_len);
> + u32 count;
> int err;
>
> + if (flags & MSG_PEEK) {
> + /* skip already peeked skbs*/
> + if (total_data_len + data_len <= copied_total) {
> + total_data_len += data_len;
> + continue;
> + }
> +
> + /* skip the already peeked data in the current skb */
> + delta = copied_total - total_data_len;
> + offset += delta;
> + data_len -= delta;
> + }
> +
> + count = min_t(size_t, len - copied, data_len);
> if (!(flags & MSG_TRUNC)) {
> err = skb_copy_datagram_msg(skb, offset, msg, count);
> if (unlikely(err < 0)) {
> @@ -1979,16 +1993,14 @@ static int __mptcp_recvmsg_mskq(struct sock *sk,
>
> copied += count;
>
> - if (count < data_len) {
> - if (!(flags & MSG_PEEK)) {
> + if (!(flags & MSG_PEEK)) {
> + msk->bytes_consumed += count;
> + if (count < data_len) {
> MPTCP_SKB_CB(skb)->offset += count;
> MPTCP_SKB_CB(skb)->map_seq += count;
> - msk->bytes_consumed += count;
> + break;
> }
> - break;
> - }
>
> - if (!(flags & MSG_PEEK)) {
> /* avoid the indirect call, we know the destructor is sock_rfree */
> skb->destructor = NULL;
> skb->sk = NULL;
> @@ -1996,7 +2008,6 @@ static int __mptcp_recvmsg_mskq(struct sock *sk,
> sk_mem_uncharge(sk, skb->truesize);
> __skb_unlink(skb, &sk->sk_receive_queue);
> skb_attempt_defer_free(skb);
> - msk->bytes_consumed += count;
> }
>
> if (copied >= len)
> @@ -2194,7 +2205,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> while (copied < len) {
> int err, bytes_read;
>
> - bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags, &tss, &cmsg_flags);
> + bytes_read = __mptcp_recvmsg_mskq(sk, msg, len - copied, flags,
> + copied, &tss, &cmsg_flags);
> if (unlikely(bytes_read < 0)) {
> if (!copied)
> copied = bytes_read;
> --
> 2.51.0
>
>
>