net/unix/af_unix.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-)
unix_stream_data_wait() does skb_peek_tail(&sk->sk_receive_queue) without
holding any lock that prevents SKBs on that queue from being dequeued and
freed.
This has been the case since commit 79f632c71bea ("unix/stream: fix
peeking with an offset larger than data in queue").
The first consequence of this is that the pointer comparison
`tail != last` can be false even if `last` semantically refers to an
already-freed SKB while `tail` is a new SKB allocated at the same address;
which can cause unix_stream_data_wait() to wrongly keep blocking after new
data has arrived, but only in a weird scenario where a peeking recv() and
a normal recv() on the same socket are racing, which is probably not a
real problem.
But since commit 2b514574f7e8 ("net: af_unix: implement splice for stream
af_unix sockets"), `tail` is actually dereferenced, which can cause UAF in
the following race scenario (where test_setup() runs single-threaded,
and afterwards, test_thread1() and test_thread2() run concurrently in
two threads:
```
static int socks[2];
void test_setup(void) {
socketpair(AF_UNIX, SOCK_STREAM, 0, socks);
send(socks[1], "A", 1, 0);
int peekoff = 1;
setsockopt(socks[0], SOL_SOCKET, SO_PEEK_OFF, &peekoff, sizeof(peekoff));
}
void test_thread1(void) {
char dummy;
recv(socks[0], &dummy, 1, MSG_PEEK);
}
void test_thread2(void) {
char dummy;
recv(socks[0], &dummy, 1, 0);
shutdown(socks[1], SHUT_WR);
}
```
when racing like this:
```
thread1 thread2
unix_stream_read_generic
mutex_lock(&u->iolock)
skb_peek(&sk->sk_receive_queue)
skb_peek_next(skb, &sk->sk_receive_queue)
mutex_unlock(&u->iolock)
unix_stream_read_generic
unix_state_lock(sk)
skb_peek(&sk->sk_receive_queue)
unix_state_unlock(sk)
unix_stream_data_wait
unix_state_lock(sk)
tail = skb_peek_tail(&sk->sk_receive_queue)
spin_lock(&sk->sk_receive_queue.lock)
__skb_unlink(skb, &sk->sk_receive_queue)
spin_unlock(&sk->sk_receive_queue.lock)
consume_skb(skb) [frees the SKB]
`tail != last`: false
`tail`: true
`tail->len != last_len` ***UAF***
```
Fix the UAF by removing the read of tail->len; checking tail->len would
only make sense if SKBs in the receive queue of a UNIX socket could grow,
which can no longer happen.
Kuniyuki explained:
> When commit 869e7c62486e ("net: af_unix: implement stream sendpage
> support") added sendpage() support, data could be appended to the last
> skb in the receiver's queue.
>
> That's why we needed to check if the length of the last skb was changed
> while waiting for new data in unix_stream_data_wait().
>
> However, commit a0dbf5f818f9 ("af_unix: Support MSG_SPLICE_PAGES") and
> commit 57d44a354a43 ("unix: Convert unix_stream_sendpage() to use
> MSG_SPLICE_PAGES") refactored sendmsg(), and now data is always added
> to a new skb.
That means this fix is not suitable for kernels before 6.5.
Fixes: 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets")
Cc: stable@vger.kernel.org # 6.5.x
Signed-off-by: Jann Horn <jannh@google.com>
---
sending as a separate patch as requested at
<https://lore.kernel.org/all/CAAVpQUDJa0=h+iFqr6ZEJ72b5nYTX3Ay-Vbkk0-7Y-KZB_3SBg@mail.gmail.com/>.
Based on the context provided there, I have changed the CC: stable line
to mark the patch as only being suitable for kernel 6.5+. If we want to
fix older kernels, I guess we'll need something different for those...
---
Changes in v2:
- limit stable backport range to 6.5+
- Link to v1: https://lore.kernel.org/r/20260515-unix-recv-wait-v1-0-76adb5f063d5@google.com
---
net/unix/af_unix.c | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 1cbf36ea043b..dc71ed79be4a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2711,8 +2711,7 @@ static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
* Sleep until more data has arrived. But check for races..
*/
static long unix_stream_data_wait(struct sock *sk, long timeo,
- struct sk_buff *last, unsigned int last_len,
- bool freezable)
+ struct sk_buff *last, bool freezable)
{
unsigned int state = TASK_INTERRUPTIBLE | freezable * TASK_FREEZABLE;
struct sk_buff *tail;
@@ -2725,7 +2724,6 @@ static long unix_stream_data_wait(struct sock *sk, long timeo,
tail = skb_peek_tail(&sk->sk_receive_queue);
if (tail != last ||
- (tail && tail->len != last_len) ||
sk->sk_err ||
(sk->sk_shutdown & RCV_SHUTDOWN) ||
signal_pending(current) ||
@@ -2921,7 +2919,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
int flags = state->flags;
bool check_creds = false;
struct scm_cookie scm;
- unsigned int last_len;
struct unix_sock *u;
int copied = 0;
int err = 0;
@@ -2967,7 +2964,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
goto unlock;
}
last = skb = skb_peek(&sk->sk_receive_queue);
- last_len = last ? last->len : 0;
again:
#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
@@ -3001,8 +2997,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
mutex_unlock(&u->iolock);
- timeo = unix_stream_data_wait(sk, timeo, last,
- last_len, freezable);
+ timeo = unix_stream_data_wait(sk, timeo, last, freezable);
if (signal_pending(current)) {
err = sock_intr_errno(timeo);
@@ -3019,7 +3014,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
while (skip >= unix_skb_len(skb)) {
skip -= unix_skb_len(skb);
last = skb;
- last_len = skb->len;
skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (!skb)
goto again;
@@ -3094,7 +3088,6 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
skip = 0;
last = skb;
- last_len = skb->len;
unix_state_lock(sk);
skb = skb_peek_next(skb, &sk->sk_receive_queue);
if (skb)
---
base-commit: aaec7096f9961eb223b5b149abe9495525c205d9
change-id: 20260518-b4-unix-recv-wait-hotfix-e91400b41251
--
Jann Horn <jannh@google.com>
On Mon, May 18, 2026 at 9:51 AM Jann Horn <jannh@google.com> wrote:
>
> unix_stream_data_wait() does skb_peek_tail(&sk->sk_receive_queue) without
> holding any lock that prevents SKBs on that queue from being dequeued and
> freed.
> This has been the case since commit 79f632c71bea ("unix/stream: fix
> peeking with an offset larger than data in queue").
> The first consequence of this is that the pointer comparison
> `tail != last` can be false even if `last` semantically refers to an
> already-freed SKB while `tail` is a new SKB allocated at the same address;
> which can cause unix_stream_data_wait() to wrongly keep blocking after new
> data has arrived, but only in a weird scenario where a peeking recv() and
> a normal recv() on the same socket are racing, which is probably not a
> real problem.
>
> But since commit 2b514574f7e8 ("net: af_unix: implement splice for stream
> af_unix sockets"), `tail` is actually dereferenced, which can cause UAF in
> the following race scenario (where test_setup() runs single-threaded,
> and afterwards, test_thread1() and test_thread2() run concurrently in
> two threads:
> ```
> static int socks[2];
> void test_setup(void) {
> socketpair(AF_UNIX, SOCK_STREAM, 0, socks);
> send(socks[1], "A", 1, 0);
> int peekoff = 1;
> setsockopt(socks[0], SOL_SOCKET, SO_PEEK_OFF, &peekoff, sizeof(peekoff));
> }
> void test_thread1(void) {
> char dummy;
> recv(socks[0], &dummy, 1, MSG_PEEK);
> }
> void test_thread2(void) {
> char dummy;
> recv(socks[0], &dummy, 1, 0);
> shutdown(socks[1], SHUT_WR);
> }
> ```
>
> when racing like this:
> ```
> thread1 thread2
> unix_stream_read_generic
> mutex_lock(&u->iolock)
> skb_peek(&sk->sk_receive_queue)
> skb_peek_next(skb, &sk->sk_receive_queue)
> mutex_unlock(&u->iolock)
> unix_stream_read_generic
> unix_state_lock(sk)
> skb_peek(&sk->sk_receive_queue)
> unix_state_unlock(sk)
> unix_stream_data_wait
> unix_state_lock(sk)
> tail = skb_peek_tail(&sk->sk_receive_queue)
> spin_lock(&sk->sk_receive_queue.lock)
> __skb_unlink(skb, &sk->sk_receive_queue)
> spin_unlock(&sk->sk_receive_queue.lock)
> consume_skb(skb) [frees the SKB]
> `tail != last`: false
> `tail`: true
> `tail->len != last_len` ***UAF***
> ```
>
> Fix the UAF by removing the read of tail->len; checking tail->len would
> only make sense if SKBs in the receive queue of a UNIX socket could grow,
> which can no longer happen.
>
> Kuniyuki explained:
>
> > When commit 869e7c62486e ("net: af_unix: implement stream sendpage
> > support") added sendpage() support, data could be appended to the last
> > skb in the receiver's queue.
> >
> > That's why we needed to check if the length of the last skb was changed
> > while waiting for new data in unix_stream_data_wait().
> >
> > However, commit a0dbf5f818f9 ("af_unix: Support MSG_SPLICE_PAGES") and
> > commit 57d44a354a43 ("unix: Convert unix_stream_sendpage() to use
> > MSG_SPLICE_PAGES") refactored sendmsg(), and now data is always added
> > to a new skb.
>
> That means this fix is not suitable for kernels before 6.5.
>
> Fixes: 2b514574f7e8 ("net: af_unix: implement splice for stream af_unix sockets")
> Cc: stable@vger.kernel.org # 6.5.x
> Signed-off-by: Jann Horn <jannh@google.com>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Thanks !
© 2016 - 2026 Red Hat, Inc.