include/net/kcm.h | 1 - net/kcm/kcmsock.c | 9 ++------- 2 files changed, 2 insertions(+), 8 deletions(-)
syzbot found a race condition when kcm_unattach(psock)
and kcm_release(kcm) are executed at the same time.
kcm_unattach is missing a check of the flag
kcm->tx_stopped before calling queue_work().
If the kcm has a reserved psock, kcm_unattach() might get executed
between cancel_work_sync() and unreserve_psock() in kcm_release(),
requeuing kcm->tx_work right before kcm gets freed in kcm_done().
Remove kcm->tx_stopped and replace it by the less
error-prone disable_work().
Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module")
Reported-by: syzbot+e62c9db591c30e174662@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=e62c9db591c30e174662
Reported-by: syzbot+d199b52665b6c3069b94@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=d199b52665b6c3069b94
Signed-off-by: Sven Stegemann <sven@stegemann.de>
---
include/net/kcm.h | 1 -
net/kcm/kcmsock.c | 9 ++-------
2 files changed, 2 insertions(+), 8 deletions(-)
diff --git a/include/net/kcm.h b/include/net/kcm.h
index 441e993be634..d9c35e71ecea 100644
--- a/include/net/kcm.h
+++ b/include/net/kcm.h
@@ -71,7 +71,6 @@ struct kcm_sock {
struct list_head wait_psock_list;
struct sk_buff *seq_skb;
struct mutex tx_mutex;
- u32 tx_stopped : 1;
/* Don't use bit fields here, these are set under different locks */
bool tx_wait;
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index a4971e6fa943..2f66b5279f2a 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -430,7 +430,7 @@ static void psock_write_space(struct sock *sk)
/* Check if the socket is reserved so someone is waiting for sending. */
kcm = psock->tx_kcm;
- if (kcm && !unlikely(kcm->tx_stopped))
+ if (kcm)
queue_work(kcm_wq, &kcm->tx_work);
spin_unlock_bh(&mux->lock);
@@ -1693,12 +1693,6 @@ static int kcm_release(struct socket *sock)
*/
__skb_queue_purge(&sk->sk_write_queue);
- /* Set tx_stopped. This is checked when psock is bound to a kcm and we
- * get a writespace callback. This prevents further work being queued
- * from the callback (unbinding the psock occurs after canceling work.
- */
- kcm->tx_stopped = 1;
-
release_sock(sk);
spin_lock_bh(&mux->lock);
@@ -1714,6 +1708,7 @@ static int kcm_release(struct socket *sock)
/* Cancel work. After this point there should be no outside references
* to the kcm socket.
*/
+ disable_work(&kcm->tx_work);
cancel_work_sync(&kcm->tx_work);
lock_sock(sk);
--
2.50.1
On 8/9/25 8:36 AM, Sven Stegemann wrote: > @@ -1714,6 +1708,7 @@ static int kcm_release(struct socket *sock) > /* Cancel work. After this point there should be no outside references > * to the kcm socket. > */ > + disable_work(&kcm->tx_work); > cancel_work_sync(&kcm->tx_work); The patch looks functionally correct, but I guess it would be cleaner simply replacing: cancel_work_sync(&kcm->tx_work); with: disable_work_sync(&kcm->tx_work); Thanks, Paolo
On 8/12/2025 2:54 PM, Paolo Abeni wrote: > On 8/9/25 8:36 AM, Sven Stegemann wrote: >> @@ -1714,6 +1708,7 @@ static int kcm_release(struct socket *sock) >> /* Cancel work. After this point there should be no outside references >> * to the kcm socket. >> */ >> + disable_work(&kcm->tx_work); >> cancel_work_sync(&kcm->tx_work); > > The patch looks functionally correct, but I guess it would be cleaner > simply replacing: > > cancel_work_sync(&kcm->tx_work); > > with: > > disable_work_sync(&kcm->tx_work); Thank you, that's a good point. I just submitted a cleaned up version of the patch.
#syz test upstream master syzbot found a race condition when kcm_unattach(psock) and kcm_release(kcm) are executed at the same time. kcm_unattach is missing a check of the flag kcm->tx_stopped before calling queue_work(). If the kcm has a reserved psock, kcm_unattach() might get executed between cancel_work_sync() and unreserve_psock() in kcm_release(), requeuing kcm->tx_work right before kcm gets freed in kcm_done(). Remove kcm->tx_stopped and replace it by the less error-prone disable_work(). Fixes: ab7ac4eb9832 ("kcm: Kernel Connection Multiplexor module") Reported-by: syzbot+e62c9db591c30e174662@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=e62c9db591c30e174662 Reported-by: syzbot+d199b52665b6c3069b94@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=d199b52665b6c3069b94 Signed-off-by: Sven Stegemann <sven@stegemann.de> --- include/net/kcm.h | 1 - net/kcm/kcmsock.c | 9 ++------- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/include/net/kcm.h b/include/net/kcm.h index 441e993be634..d9c35e71ecea 100644 --- a/include/net/kcm.h +++ b/include/net/kcm.h @@ -71,7 +71,6 @@ struct kcm_sock { struct list_head wait_psock_list; struct sk_buff *seq_skb; struct mutex tx_mutex; - u32 tx_stopped : 1; /* Don't use bit fields here, these are set under different locks */ bool tx_wait; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index a4971e6fa943..2f66b5279f2a 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -430,7 +430,7 @@ static void psock_write_space(struct sock *sk) /* Check if the socket is reserved so someone is waiting for sending. */ kcm = psock->tx_kcm; - if (kcm && !unlikely(kcm->tx_stopped)) + if (kcm) queue_work(kcm_wq, &kcm->tx_work); spin_unlock_bh(&mux->lock); @@ -1693,12 +1693,6 @@ static int kcm_release(struct socket *sock) */ __skb_queue_purge(&sk->sk_write_queue); - /* Set tx_stopped. This is checked when psock is bound to a kcm and we - * get a writespace callback. This prevents further work being queued - * from the callback (unbinding the psock occurs after canceling work. - */ - kcm->tx_stopped = 1; - release_sock(sk); spin_lock_bh(&mux->lock); @@ -1714,6 +1708,7 @@ static int kcm_release(struct socket *sock) /* Cancel work. After this point there should be no outside references * to the kcm socket. */ + disable_work(&kcm->tx_work); cancel_work_sync(&kcm->tx_work); lock_sock(sk); -- 2.50.1
Hello, syzbot has tested the proposed patch and the reproducer did not trigger any issue: Reported-by: syzbot+d199b52665b6c3069b94@syzkaller.appspotmail.com Tested-by: syzbot+d199b52665b6c3069b94@syzkaller.appspotmail.com Tested on: commit: c30a1353 Merge tag 'bpf-fixes' of git://git.kernel.org.. git tree: upstream console output: https://syzkaller.appspot.com/x/log.txt?x=10927ea2580000 kernel config: https://syzkaller.appspot.com/x/.config?x=2ae1da3a7f4a6ba4 dashboard link: https://syzkaller.appspot.com/bug?extid=d199b52665b6c3069b94 compiler: Debian clang version 20.1.7 (++20250616065708+6146a88f6049-1~exp1~20250616065826.132), Debian LLD 20.1.7 patch: https://syzkaller.appspot.com/x/patch.diff?x=134a5434580000 Note: testing is done by a robot and is best-effort only.
On 8/9/2025 8:36 AM, Sven Stegemann wrote: > syzbot found a race condition when kcm_unattach(psock) > and kcm_release(kcm) are executed at the same time. > > kcm_unattach is missing a check of the flag > kcm->tx_stopped before calling queue_work(). > > If the kcm has a reserved psock, kcm_unattach() might get executed > between cancel_work_sync() and unreserve_psock() in kcm_release(), > requeuing kcm->tx_work right before kcm gets freed in kcm_done(). > > Remove kcm->tx_stopped and replace it by the less > error-prone disable_work(). I made a mistake in the subject line. It is supposed to say "[PATCH net]" instead of "[PATCH net-next". Also some information about the testing I have done: I patched msleep() calls into the race windows and wrote a reproducer in C that reliably triggers a KASAN use-after-free read at the beginning of kcm_tx_work if run against that kernel build. With the proposed patch the reproducer does not trigger any crashes or warnings. The syscalls also return non-negative values. These are the files I used for debugging: - Kernel patch: diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index a4971e6fa943..df61f4715747 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -446,6 +446,8 @@ static struct kcm_psock *reserve_psock(struct kcm_sock *kcm) struct kcm_mux *mux = kcm->mux; struct kcm_psock *psock; + printk("reserve_psock: call function"); + psock = kcm->tx_psock; smp_rmb(); /* Must read tx_psock before tx_wait */ @@ -527,6 +529,8 @@ static void unreserve_psock(struct kcm_sock *kcm) struct kcm_psock *psock; struct kcm_mux *mux = kcm->mux; + printk("unreserve_psock: call function"); + spin_lock_bh(&mux->lock); psock = kcm->tx_psock; @@ -715,6 +719,10 @@ static void kcm_tx_work(struct work_struct *w) struct sock *sk = &kcm->sk; int err; + printk("kcm_tx_work: entered function"); + + msleep(200); + lock_sock(sk); /* Primarily for SOCK_DGRAM sockets, also handle asynchronous tx @@ -737,6 +745,9 @@ static void kcm_tx_work(struct work_struct *w) out: release_sock(sk); + + printk("kcm_tx_work: exiting function"); + } static void kcm_push(struct kcm_sock *kcm) @@ -1357,6 +1368,8 @@ static void kcm_unattach(struct kcm_psock *psock) struct sock *csk = psock->sk; struct kcm_mux *mux = psock->mux; + printk("kcm_unattach: entered function"); + lock_sock(csk); /* Stop getting callbacks from TCP socket. After this there should @@ -1419,6 +1432,9 @@ static void kcm_unattach(struct kcm_psock *psock) */ kcm_abort_tx_psock(psock, EPIPE, false); + printk("kcm_unattach: sleeping before queue_work"); + msleep(100); + spin_lock_bh(&mux->lock); if (!psock->tx_kcm) { /* psock now unreserved in window mux was unlocked */ @@ -1429,6 +1445,8 @@ static void kcm_unattach(struct kcm_psock *psock) /* Commit done before queuing work to process it */ smp_mb(); + printk("kcm_unattach: queueing tx_work"); + /* Queue tx work to make sure psock->done is handled */ queue_work(kcm_wq, &psock->tx_kcm->tx_work); spin_unlock_bh(&mux->lock); @@ -1446,6 +1464,8 @@ static void kcm_unattach(struct kcm_psock *psock) } release_sock(csk); + + printk("kcm_unattach: exiting function"); } static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info) @@ -1677,6 +1697,8 @@ static int kcm_release(struct socket *sock) struct kcm_mux *mux; struct kcm_psock *psock; + printk("kcm_release: entered function"); + if (!sk) return 0; @@ -1716,6 +1738,9 @@ static int kcm_release(struct socket *sock) */ cancel_work_sync(&kcm->tx_work); + printk("kcm_release: sleeping after cancel_work_sync"); + msleep(150); + lock_sock(sk); psock = kcm->tx_psock; if (psock) { @@ -1733,8 +1758,12 @@ static int kcm_release(struct socket *sock) sock->sk = NULL; + printk("kcm_release: freeing kcm"); + kcm_done(kcm); + printk("kcm_release: exiting function"); + return 0; } -- - Reproducer: #include <arpa/inet.h> #include <linux/bpf.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/kcm.h> #include <linux/bpf_common.h> #include <sys/ioctl.h> #include <sys/mman.h> #include <sys/socket.h> #include <sys/syscall.h> #include <sys/wait.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> int check_error(int ret, const char *err_message) { if(ret < 0) { perror(err_message); exit(ret); } return ret; } int main() { // system("busybox ip l set lo up"); union bpf_attr prog = { .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, .insn_cnt = 2, .insns = (uint64_t)(struct bpf_insn[]){ {.code=BPF_ALU64|BPF_MOV|BPF_K, .dst_reg=BPF_REG_0, .imm=0}, {.code=BPF_JMP|BPF_EXIT}, }, .license = (__u64) "", }; int tcp_fd, listen_fd, accept_fd, bpf_fd, kcm_fd, mux_fd; struct sockaddr_in addr = { .sin_family = AF_INET, .sin_port = htons(3270), .sin_addr.s_addr = inet_addr("127.0.0.1") }; check_error( listen_fd = socket(AF_INET, SOCK_STREAM, 0), "socket(tcp)" ); check_error( bind(listen_fd, (void *)&addr, sizeof(addr)), "bind" ); check_error( listen(listen_fd, 1), "listen" ); check_error( tcp_fd = socket(AF_INET, SOCK_STREAM, 0), "socket(tcp)" ); check_error( connect(tcp_fd, (void *)&addr, sizeof(addr)), "connect" ); check_error( bpf_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &prog, 48), "bpf" ); check_error( mux_fd = socket(AF_KCM, SOCK_SEQPACKET, 0), "socket(mux)" ); check_error( ioctl(mux_fd, SIOCKCMCLONE, &kcm_fd), "clone" ); struct kcm_attach attach = {tcp_fd, bpf_fd}; check_error( ioctl(mux_fd, SIOCKCMATTACH, &attach), "attach" ); size_t msg_len = (1<<24); struct iovec iov = { .iov_base = mmap(NULL, msg_len, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0), .iov_len = msg_len, }; struct msghdr msg = { .msg_name = "R", .msg_namelen = 1, .msg_iov = &iov, .msg_iovlen = 1, 0 }; check_error( sendmsg(kcm_fd, &msg, MSG_EOR), "sendmsg" ); printf("Wait 30s for worker threads to finish\n"); sleep(30); if (fork() == 0) { sleep(1); printf("Calling close from child\n"); check_error( close(kcm_fd), "close(kcm) (child)" ); printf("Child done\n"); } else { check_error( close(kcm_fd), "close(kcm) (parent)" ); sleep(1); printf("Calling unattach from parent\n"); struct kcm_unattach unattach = {tcp_fd}; check_error( ioctl(mux_fd, SIOCKCMUNATTACH, &unattach), "unattach" ); printf("Parent done\n"); int wstatus; wait(&wstatus); } }
© 2016 - 2025 Red Hat, Inc.