include/linux/mempolicy.h | 1 + mm/mempolicy.c | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-)
During futex_key_to_node_opt() execution, vma->vm_policy is read under
speculative mmap lock and RCU. Concurrently, mbind() may call
vma_replace_policy() which frees the old mempolicy immediately via
kmem_cache_free().
This creates a race where __futex_key_to_node() dereferences a freed
mempolicy pointer, causing a use-after-free read of mpol->mode.
[ 151.412631] BUG: KASAN: slab-use-after-free in __futex_key_to_node (kernel/futex/core.c:349)
[ 151.414046] Read of size 2 at addr ffff888001c49634 by task e/87
[ 151.414476]
[ 151.415431] CPU: 1 UID: 1000 PID: 87 Comm: e Not tainted 7.0.0-rc3-g0257f64bdac7 #1 PREEMPT(lazy)
[ 151.415758] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
[ 151.415969] Call Trace:
[ 151.416059] <TASK>
[ 151.416161] dump_stack_lvl (lib/dump_stack.c:123)
[ 151.416299] print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
[ 151.416359] ? __virt_addr_valid (./include/linux/mmzone.h:2046 ./include/linux/mmzone.h:2198 arch/x86/mm/physaddr.c:54)
[ 151.416412] ? __futex_key_to_node (kernel/futex/core.c:349)
[ 151.416517] ? kasan_complete_mode_report_info (mm/kasan/report_generic.c:182)
[ 151.416583] ? __futex_key_to_node (kernel/futex/core.c:349)
[ 151.416631] kasan_report (mm/kasan/report.c:597)
[ 151.416677] ? __futex_key_to_node (kernel/futex/core.c:349)
[ 151.416732] __asan_load2 (mm/kasan/generic.c:271)
[ 151.416777] __futex_key_to_node (kernel/futex/core.c:349)
[ 151.416822] get_futex_key (kernel/futex/core.c:374 kernel/futex/core.c:386 kernel/futex/core.c:593)
[ 151.416871] ? __pfx_get_futex_key (kernel/futex/core.c:550)
[ 151.416927] futex_wake (kernel/futex/waitwake.c:165)
[ 151.416976] ? __pfx_futex_wake (kernel/futex/waitwake.c:156)
[ 151.417022] ? __pfx___x64_sys_futex_wait (kernel/futex/syscalls.c:398)
[ 151.417081] __x64_sys_futex_wake (kernel/futex/syscalls.c:382 kernel/futex/syscalls.c:366 kernel/futex/syscalls.c:366)
[ 151.417129] x64_sys_call (arch/x86/entry/syscall_64.c:41)
[ 151.417236] do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94)
[ 151.417342] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
[ 151.418312] </TASK>
Fix by adding rcu to __mpol_put().
change-log:
v2-v1: add rcu to __mpol_put
Fixes: c042c505210d ("futex: Implement FUTEX2_MPOL")
Reported-by: Hao-Yu Yang <naup96721@gmail.com>
Signed-off-by: Hao-Yu Yang <naup96721@gmail.com>
---
include/linux/mempolicy.h | 1 +
mm/mempolicy.c | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 0fe96f3ab3ef..65c732d440d2 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -55,6 +55,7 @@ struct mempolicy {
nodemask_t cpuset_mems_allowed; /* relative to these nodes */
nodemask_t user_nodemask; /* nodemask passed by user */
} w;
+ struct rcu_head rcu;
};
/*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0e5175f1c767..6dc61a3d4a32 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -487,7 +487,7 @@ void __mpol_put(struct mempolicy *pol)
{
if (!atomic_dec_and_test(&pol->refcnt))
return;
- kmem_cache_free(policy_cache, pol);
+ kfree_rcu(pol, rcu);
}
EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm");
--
2.34.1
Hao-Yu!
On Fri, Mar 13 2026 at 20:47, Hao-Yu Yang wrote:
I've removed the security list as this is public already.
Also added the mm list and the maintainers. While it fixes the futex
problem it is a change to the MM subsystem, so those people need to be
involved.
> During futex_key_to_node_opt() execution, vma->vm_policy is read under
> speculative mmap lock and RCU. Concurrently, mbind() may call
> vma_replace_policy() which frees the old mempolicy immediately via
> kmem_cache_free().
>
> This creates a race where __futex_key_to_node() dereferences a freed
> mempolicy pointer, causing a use-after-free read of mpol->mode.
> [ 151.412631] BUG: KASAN: slab-use-after-free in __futex_key_to_node (kernel/futex/core.c:349)
> [ 151.414046] Read of size 2 at addr ffff888001c49634 by task e/87
> [ 151.414476]
> [ 151.415431] CPU: 1 UID: 1000 PID: 87 Comm: e Not tainted 7.0.0-rc3-g0257f64bdac7 #1 PREEMPT(lazy)
> [ 151.415758] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
> [ 151.415969] Call Trace:
> [ 151.416059] <TASK>
> [ 151.416161] dump_stack_lvl (lib/dump_stack.c:123)
> [ 151.416299] print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
> [ 151.416359] ? __virt_addr_valid (./include/linux/mmzone.h:2046 ./include/linux/mmzone.h:2198 arch/x86/mm/physaddr.c:54)
> [ 151.416412] ? __futex_key_to_node (kernel/futex/core.c:349)
> [ 151.416517] ? kasan_complete_mode_report_info (mm/kasan/report_generic.c:182)
> [ 151.416583] ? __futex_key_to_node (kernel/futex/core.c:349)
> [ 151.416631] kasan_report (mm/kasan/report.c:597)
> [ 151.416677] ? __futex_key_to_node (kernel/futex/core.c:349)
> [ 151.416732] __asan_load2 (mm/kasan/generic.c:271)
> [ 151.416777] __futex_key_to_node (kernel/futex/core.c:349)
> [ 151.416822] get_futex_key (kernel/futex/core.c:374 kernel/futex/core.c:386 kernel/futex/core.c:593)
> [ 151.416871] ? __pfx_get_futex_key (kernel/futex/core.c:550)
> [ 151.416927] futex_wake (kernel/futex/waitwake.c:165)
> [ 151.416976] ? __pfx_futex_wake (kernel/futex/waitwake.c:156)
> [ 151.417022] ? __pfx___x64_sys_futex_wait (kernel/futex/syscalls.c:398)
> [ 151.417081] __x64_sys_futex_wake (kernel/futex/syscalls.c:382 kernel/futex/syscalls.c:366 kernel/futex/syscalls.c:366)
> [ 151.417129] x64_sys_call (arch/x86/entry/syscall_64.c:41)
> [ 151.417236] do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94)
> [ 151.417342] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
> [ 151.418312] </TASK>
Please trim the backtrace so it only contains the real important
information.
https://docs.kernel.org/process/submitting-patches.html#backtraces-in-commit-messages
> Fix by adding rcu to __mpol_put().
>
> change-log:
> v2-v1: add rcu to __mpol_put
The change history is not part of the change log, it want's to be placed
after the --- separator.
> Fixes: c042c505210d ("futex: Implement FUTEX2_MPOL")
> Reported-by: Hao-Yu Yang <naup96721@gmail.com>
> Signed-off-by: Hao-Yu Yang <naup96721@gmail.com>
This should have a
Suggested-by: Eric Dumazet <edumazet@google.com>
tag.
> ---
> include/linux/mempolicy.h | 1 +
> mm/mempolicy.c | 2 +-
> 2 files changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
> index 0fe96f3ab3ef..65c732d440d2 100644
> --- a/include/linux/mempolicy.h
> +++ b/include/linux/mempolicy.h
> @@ -55,6 +55,7 @@ struct mempolicy {
> nodemask_t cpuset_mems_allowed; /* relative to these nodes */
> nodemask_t user_nodemask; /* nodemask passed by user */
> } w;
> + struct rcu_head rcu;
> };
>
> /*
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 0e5175f1c767..6dc61a3d4a32 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -487,7 +487,7 @@ void __mpol_put(struct mempolicy *pol)
> {
> if (!atomic_dec_and_test(&pol->refcnt))
> return;
> - kmem_cache_free(policy_cache, pol);
> + kfree_rcu(pol, rcu);
> }
> EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm");
While this looks functionally correct it is incomplete in terms of RCU.
The vma->vm_policy pointer needs to be marked __rcu. That then requires
to use rcu_dereference_check() at the reader side and
rcu_assign_pointer() and rcu_replace_pointer() on the writer side.
Especially the writer side is required so that the proper memory
barriers are inserted for architectures with a weakly ordered memory
model.
Thanks,
tglx
On Mon, Mar 23, 2026 at 06:24:42PM +0100, Thomas Gleixner wrote:
> > include/linux/mempolicy.h | 1 +
> > mm/mempolicy.c | 2 +-
> > 2 files changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
> > index 0fe96f3ab3ef..65c732d440d2 100644
> > --- a/include/linux/mempolicy.h
> > +++ b/include/linux/mempolicy.h
> > @@ -55,6 +55,7 @@ struct mempolicy {
> > nodemask_t cpuset_mems_allowed; /* relative to these nodes */
> > nodemask_t user_nodemask; /* nodemask passed by user */
> > } w;
> > + struct rcu_head rcu;
> > };
> >
> > /*
> > diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> > index 0e5175f1c767..6dc61a3d4a32 100644
> > --- a/mm/mempolicy.c
> > +++ b/mm/mempolicy.c
> > @@ -487,7 +487,7 @@ void __mpol_put(struct mempolicy *pol)
> > {
> > if (!atomic_dec_and_test(&pol->refcnt))
> > return;
> > - kmem_cache_free(policy_cache, pol);
> > + kfree_rcu(pol, rcu);
> > }
> > EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm");
>
> While this looks functionally correct it is incomplete in terms of RCU.
>
> The vma->vm_policy pointer needs to be marked __rcu. That then requires
> to use rcu_dereference_check() at the reader side and
> rcu_assign_pointer() and rcu_replace_pointer() on the writer side.
I hate that sparse annotation; it mostly just makes the code unreadable
for then requiring those unwieldy rcu helper functions.
Not to mention we don't actually need any of that here, because:
> Especially the writer side is required so that the proper memory
> barriers are inserted for architectures with a weakly ordered memory
> model.
The vma->vm_policy thing is written under mmap_lock held for writing,
and the futex consumer is a speculative read lock. Specifically the
ordering is through the associated seqcount.
All that is really needed is to extend the lifetime of the mpol to the
associated RCU period. Which is exactly what this patch does.
Want me to go write up a better Changelog?
On Tue, Mar 24 2026 at 15:00, Peter Zijlstra wrote: > On Mon, Mar 23, 2026 at 06:24:42PM +0100, Thomas Gleixner wrote: > Not to mention we don't actually need any of that here, because: > >> Especially the writer side is required so that the proper memory >> barriers are inserted for architectures with a weakly ordered memory >> model. > > The vma->vm_policy thing is written under mmap_lock held for writing, > and the futex consumer is a speculative read lock. Specifically the > ordering is through the associated seqcount. Duh. Yes. > All that is really needed is to extend the lifetime of the mpol to the > associated RCU period. Which is exactly what this patch does. > > Want me to go write up a better Changelog? And a comment in the code explaining the RCU magic perhaps?
On Tue, Mar 24, 2026 at 05:36:42PM +0100, Thomas Gleixner wrote:
> On Tue, Mar 24 2026 at 15:00, Peter Zijlstra wrote:
> > On Mon, Mar 23, 2026 at 06:24:42PM +0100, Thomas Gleixner wrote:
> > Not to mention we don't actually need any of that here, because:
> >
> >> Especially the writer side is required so that the proper memory
> >> barriers are inserted for architectures with a weakly ordered memory
> >> model.
> >
> > The vma->vm_policy thing is written under mmap_lock held for writing,
> > and the futex consumer is a speculative read lock. Specifically the
> > ordering is through the associated seqcount.
>
> Duh. Yes.
>
> > All that is really needed is to extend the lifetime of the mpol to the
> > associated RCU period. Which is exactly what this patch does.
> >
> > Want me to go write up a better Changelog?
>
> And a comment in the code explaining the RCU magic perhaps?
Does this work for you?
---
Subject: futex: Fix UaF between futex_key_to_node_opt() and vma_replace_policy()
From: Hao-Yu Yang <naup96721@gmail.com>
Date: Fri, 13 Mar 2026 20:47:56 +0800
From: Hao-Yu Yang <naup96721@gmail.com>
During futex_key_to_node_opt() execution, vma->vm_policy is read under
speculative mmap lock and RCU. Concurrently, mbind() may call
vma_replace_policy() which frees the old mempolicy immediately via
kmem_cache_free().
This creates a race where __futex_key_to_node() dereferences a freed
mempolicy pointer, causing a use-after-free read of mpol->mode.
[ 151.412631] BUG: KASAN: slab-use-after-free in __futex_key_to_node (kernel/futex/core.c:349)
[ 151.414046] Read of size 2 at addr ffff888001c49634 by task e/87
[ 151.415969] Call Trace:
[ 151.416732] __asan_load2 (mm/kasan/generic.c:271)
[ 151.416777] __futex_key_to_node (kernel/futex/core.c:349)
[ 151.416822] get_futex_key (kernel/futex/core.c:374 kernel/futex/core.c:386 kernel/futex/core.c:593)
Fix by adding rcu to __mpol_put().
Fixes: c042c505210d ("futex: Implement FUTEX2_MPOL")
Reported-by: Hao-Yu Yang <naup96721@gmail.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hao-Yu Yang <naup96721@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
include/linux/mempolicy.h | 1 +
mm/mempolicy.c | 8 +++++++-
2 files changed, 8 insertions(+), 1 deletion(-)
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -55,6 +55,7 @@ struct mempolicy {
nodemask_t cpuset_mems_allowed; /* relative to these nodes */
nodemask_t user_nodemask; /* nodemask passed by user */
} w;
+ struct rcu_head rcu;
};
/*
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -487,7 +487,13 @@ void __mpol_put(struct mempolicy *pol)
{
if (!atomic_dec_and_test(&pol->refcnt))
return;
- kmem_cache_free(policy_cache, pol);
+ /*
+ * Required to allow mmap_lock_speculative*() access, see for example
+ * futex_key_to_node_opt(). All accesses are serialized by mmap_lock,
+ * however the speculative lock section unbound by the normal lock
+ * boundaries, requiring RCU freeing.
+ */
+ kfree_rcu(pol, rcu);
}
EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm");
On 3/24/26 18:44, Peter Zijlstra wrote:
> On Tue, Mar 24, 2026 at 05:36:42PM +0100, Thomas Gleixner wrote:
>> On Tue, Mar 24 2026 at 15:00, Peter Zijlstra wrote:
>>> Not to mention we don't actually need any of that here, because:
>>>
>>>
>>> The vma->vm_policy thing is written under mmap_lock held for writing,
>>> and the futex consumer is a speculative read lock. Specifically the
>>> ordering is through the associated seqcount.
>>
>> Duh. Yes.
>>
>>> All that is really needed is to extend the lifetime of the mpol to the
>>> associated RCU period. Which is exactly what this patch does.
>>>
>>> Want me to go write up a better Changelog?
>>
>> And a comment in the code explaining the RCU magic perhaps?
>
> Does this work for you?
>
CCing Lorenzo and Liam.
> ---
> Subject: futex: Fix UaF between futex_key_to_node_opt() and vma_replace_policy()
> From: Hao-Yu Yang <naup96721@gmail.com>
> Date: Fri, 13 Mar 2026 20:47:56 +0800
>
> From: Hao-Yu Yang <naup96721@gmail.com>
>
> During futex_key_to_node_opt() execution, vma->vm_policy is read under
> speculative mmap lock and RCU. Concurrently, mbind() may call
> vma_replace_policy() which frees the old mempolicy immediately via
> kmem_cache_free().
>
> This creates a race where __futex_key_to_node() dereferences a freed
> mempolicy pointer, causing a use-after-free read of mpol->mode.
>
> [ 151.412631] BUG: KASAN: slab-use-after-free in __futex_key_to_node (kernel/futex/core.c:349)
> [ 151.414046] Read of size 2 at addr ffff888001c49634 by task e/87
>
> [ 151.415969] Call Trace:
>
> [ 151.416732] __asan_load2 (mm/kasan/generic.c:271)
> [ 151.416777] __futex_key_to_node (kernel/futex/core.c:349)
> [ 151.416822] get_futex_key (kernel/futex/core.c:374 kernel/futex/core.c:386 kernel/futex/core.c:593)
>
> Fix by adding rcu to __mpol_put().
>
> Fixes: c042c505210d ("futex: Implement FUTEX2_MPOL")
> Reported-by: Hao-Yu Yang <naup96721@gmail.com>
> Suggested-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Hao-Yu Yang <naup96721@gmail.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
> include/linux/mempolicy.h | 1 +
> mm/mempolicy.c | 8 +++++++-
> 2 files changed, 8 insertions(+), 1 deletion(-)
>
> --- a/include/linux/mempolicy.h
> +++ b/include/linux/mempolicy.h
> @@ -55,6 +55,7 @@ struct mempolicy {
> nodemask_t cpuset_mems_allowed; /* relative to these nodes */
> nodemask_t user_nodemask; /* nodemask passed by user */
> } w;
> + struct rcu_head rcu;
> };
>
> /*
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -487,7 +487,13 @@ void __mpol_put(struct mempolicy *pol)
> {
> if (!atomic_dec_and_test(&pol->refcnt))
> return;
> - kmem_cache_free(policy_cache, pol);
> + /*
> + * Required to allow mmap_lock_speculative*() access, see for example
> + * futex_key_to_node_opt(). All accesses are serialized by mmap_lock,
> + * however the speculative lock section unbound by the normal lock
> + * boundaries, requiring RCU freeing.
> + */
> + kfree_rcu(pol, rcu);
> }
> EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm");
>
So IIUC, futex_key_to_node_opt() looks up a VMA under RCU, without
holding the mmap lock. Concurrent mmap-write lock is detected by using
the mmap_lock_speculate_try_begin()/mmap_lock_speculate_retry() seqcount.
After looking up the VMA, we access the VMA policy.
vma_policy() does a straight vma->vm_policy.
What prevents the compiler here to do some load tearing while it is
getting modified by mbind()? Or what stops the writer side to to some
store tearing?
Shouldn't we be using at least READ_ONCE/WRITE_ONCE() etc?
--
Cheers,
David
On Tue, Mar 24, 2026 at 09:27:41PM +0100, David Hildenbrand (Arm) wrote: > So IIUC, futex_key_to_node_opt() looks up a VMA under RCU, without > holding the mmap lock. Concurrent mmap-write lock is detected by using > the mmap_lock_speculate_try_begin()/mmap_lock_speculate_retry() seqcount. > > After looking up the VMA, we access the VMA policy. > > vma_policy() does a straight vma->vm_policy. > > What prevents the compiler here to do some load tearing while it is > getting modified by mbind()? Or what stops the writer side to to some > store tearing? > > Shouldn't we be using at least READ_ONCE/WRITE_ONCE() etc? Bah, at that point we might as well RCU the thing like so, I suppose. --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1026,7 +1026,7 @@ static int vma_replace_policy(struct vm_ } old = vma->vm_policy; - vma->vm_policy = new; /* protected by mmap_lock */ + rcu_assign_pointer(vma->vm_policy, new); /* protected by mmap_lock */ mpol_put(old); return 0; diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 4bacf5565368..6336a80e3dca 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -342,7 +342,7 @@ static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr) if (!vma) return FUTEX_NO_NODE; - mpol = vma_policy(vma); + mpol = rcu_dereference_raw(vma->vm_policy); if (!mpol) return FUTEX_NO_NODE;
On Wed, Mar 25, 2026 at 8:14 AM Peter Zijlstra <peterz@infradead.org> wrote: > > On Tue, Mar 24, 2026 at 09:27:41PM +0100, David Hildenbrand (Arm) wrote: > > So IIUC, futex_key_to_node_opt() looks up a VMA under RCU, without > > holding the mmap lock. Concurrent mmap-write lock is detected by using > > the mmap_lock_speculate_try_begin()/mmap_lock_speculate_retry() seqcount. > > > > After looking up the VMA, we access the VMA policy. > > > > vma_policy() does a straight vma->vm_policy. > > > > What prevents the compiler here to do some load tearing while it is > > getting modified by mbind()? Or what stops the writer side to to some > > store tearing? > > > > Shouldn't we be using at least READ_ONCE/WRITE_ONCE() etc? > > Bah, at that point we might as well RCU the thing like so, I suppose. > > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -1026,7 +1026,7 @@ static int vma_replace_policy(struct vm_ > } > > old = vma->vm_policy; > - vma->vm_policy = new; /* protected by mmap_lock */ > + rcu_assign_pointer(vma->vm_policy, new); /* protected by mmap_lock */ > mpol_put(old); > > return 0; > diff --git a/kernel/futex/core.c b/kernel/futex/core.c > index 4bacf5565368..6336a80e3dca 100644 > --- a/kernel/futex/core.c > +++ b/kernel/futex/core.c > @@ -342,7 +342,7 @@ static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr) > if (!vma) > return FUTEX_NO_NODE; > > - mpol = vma_policy(vma); > + mpol = rcu_dereference_raw(vma->vm_policy); > if (!mpol) > return FUTEX_NO_NODE; Yes, but sparse will bite :) READ_ONCE()/WRITE_ONCE() on these two locations seems acceptable.
On Wed, Mar 25, 2026 at 08:19:24AM -0700, Eric Dumazet wrote: > On Wed, Mar 25, 2026 at 8:14 AM Peter Zijlstra <peterz@infradead.org> wrote: > > > > On Tue, Mar 24, 2026 at 09:27:41PM +0100, David Hildenbrand (Arm) wrote: > > > So IIUC, futex_key_to_node_opt() looks up a VMA under RCU, without > > > holding the mmap lock. Concurrent mmap-write lock is detected by using > > > the mmap_lock_speculate_try_begin()/mmap_lock_speculate_retry() seqcount. > > > > > > After looking up the VMA, we access the VMA policy. > > > > > > vma_policy() does a straight vma->vm_policy. > > > > > > What prevents the compiler here to do some load tearing while it is > > > getting modified by mbind()? Or what stops the writer side to to some > > > store tearing? > > > > > > Shouldn't we be using at least READ_ONCE/WRITE_ONCE() etc? > > > > Bah, at that point we might as well RCU the thing like so, I suppose. > > > > --- a/mm/mempolicy.c > > +++ b/mm/mempolicy.c > > @@ -1026,7 +1026,7 @@ static int vma_replace_policy(struct vm_ > > } > > > > old = vma->vm_policy; > > - vma->vm_policy = new; /* protected by mmap_lock */ > > + rcu_assign_pointer(vma->vm_policy, new); /* protected by mmap_lock */ > > mpol_put(old); > > > > return 0; > > diff --git a/kernel/futex/core.c b/kernel/futex/core.c > > index 4bacf5565368..6336a80e3dca 100644 > > --- a/kernel/futex/core.c > > +++ b/kernel/futex/core.c > > @@ -342,7 +342,7 @@ static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr) > > if (!vma) > > return FUTEX_NO_NODE; > > > > - mpol = vma_policy(vma); > > + mpol = rcu_dereference_raw(vma->vm_policy); > > if (!mpol) > > return FUTEX_NO_NODE; > > Yes, but sparse will bite :) Oh gawd, yes, and then people will go 'fix' it and it'll turn into an unholy mess. > READ_ONCE()/WRITE_ONCE() on these two locations seems acceptable. Fair enough. Like so then.. --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -342,7 +342,7 @@ static int __futex_key_to_node(struct mm if (!vma) return FUTEX_NO_NODE; - mpol = vma_policy(vma); + mpol = READ_ONCE(vma->vm_policy); if (!mpol) return FUTEX_NO_NODE; --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1026,7 +1026,7 @@ static int vma_replace_policy(struct vm_ } old = vma->vm_policy; - vma->vm_policy = new; /* protected by mmap_lock */ + WRITE_ONCE(vma->vm_policy, new); /* protected by mmap_lock */ mpol_put(old); return 0;
On Wed, Mar 25, 2026 at 8:22 AM Peter Zijlstra <peterz@infradead.org> wrote: > Fair enough. Like so then.. > > --- a/kernel/futex/core.c > +++ b/kernel/futex/core.c > @@ -342,7 +342,7 @@ static int __futex_key_to_node(struct mm > if (!vma) > return FUTEX_NO_NODE; > > - mpol = vma_policy(vma); > + mpol = READ_ONCE(vma->vm_policy); > if (!mpol) > return FUTEX_NO_NODE; > > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -1026,7 +1026,7 @@ static int vma_replace_policy(struct vm_ > } > > old = vma->vm_policy; > - vma->vm_policy = new; /* protected by mmap_lock */ > + WRITE_ONCE(vma->vm_policy, new); /* protected by mmap_lock */ > mpol_put(old); > > return 0; LGTM, thanks ! Reviewed-by: Eric Dumazet <edumazet@google.com>
I need to send patch v3? If i need how i need to change about this patch? On Wed, Mar 25, 2026 at 08:25:29AM -0700, Eric Dumazet wrote: > On Wed, Mar 25, 2026 at 8:22 AM Peter Zijlstra <peterz@infradead.org> wrote: > > > Fair enough. Like so then.. > > > > --- a/kernel/futex/core.c > > +++ b/kernel/futex/core.c > > @@ -342,7 +342,7 @@ static int __futex_key_to_node(struct mm > > if (!vma) > > return FUTEX_NO_NODE; > > > > - mpol = vma_policy(vma); > > + mpol = READ_ONCE(vma->vm_policy); > > if (!mpol) > > return FUTEX_NO_NODE; > > > > --- a/mm/mempolicy.c > > +++ b/mm/mempolicy.c > > @@ -1026,7 +1026,7 @@ static int vma_replace_policy(struct vm_ > > } > > > > old = vma->vm_policy; > > - vma->vm_policy = new; /* protected by mmap_lock */ > > + WRITE_ONCE(vma->vm_policy, new); /* protected by mmap_lock */ > > mpol_put(old); > > > > return 0; > > LGTM, thanks ! > > Reviewed-by: Eric Dumazet <edumazet@google.com>
On 3/25/26 16:22, Peter Zijlstra wrote: > On Wed, Mar 25, 2026 at 08:19:24AM -0700, Eric Dumazet wrote: >> On Wed, Mar 25, 2026 at 8:14 AM Peter Zijlstra <peterz@infradead.org> wrote: >>> >>> >>> Bah, at that point we might as well RCU the thing like so, I suppose. >>> >>> --- a/mm/mempolicy.c >>> +++ b/mm/mempolicy.c >>> @@ -1026,7 +1026,7 @@ static int vma_replace_policy(struct vm_ >>> } >>> >>> old = vma->vm_policy; >>> - vma->vm_policy = new; /* protected by mmap_lock */ >>> + rcu_assign_pointer(vma->vm_policy, new); /* protected by mmap_lock */ >>> mpol_put(old); >>> >>> return 0; >>> diff --git a/kernel/futex/core.c b/kernel/futex/core.c >>> index 4bacf5565368..6336a80e3dca 100644 >>> --- a/kernel/futex/core.c >>> +++ b/kernel/futex/core.c >>> @@ -342,7 +342,7 @@ static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr) >>> if (!vma) >>> return FUTEX_NO_NODE; >>> >>> - mpol = vma_policy(vma); >>> + mpol = rcu_dereference_raw(vma->vm_policy); >>> if (!mpol) >>> return FUTEX_NO_NODE; >> >> Yes, but sparse will bite :) > > Oh gawd, yes, and then people will go 'fix' it and it'll turn into an > unholy mess. > >> READ_ONCE()/WRITE_ONCE() on these two locations seems acceptable. > > Fair enough. Like so then.. > > --- a/kernel/futex/core.c > +++ b/kernel/futex/core.c > @@ -342,7 +342,7 @@ static int __futex_key_to_node(struct mm > if (!vma) > return FUTEX_NO_NODE; > > - mpol = vma_policy(vma); > + mpol = READ_ONCE(vma->vm_policy); > if (!mpol) > return FUTEX_NO_NODE; > > --- a/mm/mempolicy.c > +++ b/mm/mempolicy.c > @@ -1026,7 +1026,7 @@ static int vma_replace_policy(struct vm_ > } > > old = vma->vm_policy; > - vma->vm_policy = new; /* protected by mmap_lock */ > + WRITE_ONCE(vma->vm_policy, new); /* protected by mmap_lock */ > mpol_put(old); > > return 0; LGTM, feel free to add my Acked-by: David Hildenbrand (Arm) <david@kernel.org> -- Cheers, David
On Tue, Mar 24 2026 at 18:44, Peter Zijlstra wrote:
> On Tue, Mar 24, 2026 at 05:36:42PM +0100, Thomas Gleixner wrote:
>> On Tue, Mar 24 2026 at 15:00, Peter Zijlstra wrote:
>> > On Mon, Mar 23, 2026 at 06:24:42PM +0100, Thomas Gleixner wrote:
>> > Not to mention we don't actually need any of that here, because:
>> >
>> >> Especially the writer side is required so that the proper memory
>> >> barriers are inserted for architectures with a weakly ordered memory
>> >> model.
>> >
>> > The vma->vm_policy thing is written under mmap_lock held for writing,
>> > and the futex consumer is a speculative read lock. Specifically the
>> > ordering is through the associated seqcount.
>>
>> Duh. Yes.
>>
>> > All that is really needed is to extend the lifetime of the mpol to the
>> > associated RCU period. Which is exactly what this patch does.
>> >
>> > Want me to go write up a better Changelog?
>>
>> And a comment in the code explaining the RCU magic perhaps?
>
> Does this work for you?
Perfect
> ---
> Subject: futex: Fix UaF between futex_key_to_node_opt() and vma_replace_policy()
> From: Hao-Yu Yang <naup96721@gmail.com>
> Date: Fri, 13 Mar 2026 20:47:56 +0800
>
> From: Hao-Yu Yang <naup96721@gmail.com>
>
> During futex_key_to_node_opt() execution, vma->vm_policy is read under
> speculative mmap lock and RCU. Concurrently, mbind() may call
> vma_replace_policy() which frees the old mempolicy immediately via
> kmem_cache_free().
>
> This creates a race where __futex_key_to_node() dereferences a freed
> mempolicy pointer, causing a use-after-free read of mpol->mode.
>
> [ 151.412631] BUG: KASAN: slab-use-after-free in __futex_key_to_node (kernel/futex/core.c:349)
> [ 151.414046] Read of size 2 at addr ffff888001c49634 by task e/87
>
> [ 151.415969] Call Trace:
>
> [ 151.416732] __asan_load2 (mm/kasan/generic.c:271)
> [ 151.416777] __futex_key_to_node (kernel/futex/core.c:349)
> [ 151.416822] get_futex_key (kernel/futex/core.c:374 kernel/futex/core.c:386 kernel/futex/core.c:593)
>
> Fix by adding rcu to __mpol_put().
>
> Fixes: c042c505210d ("futex: Implement FUTEX2_MPOL")
> Reported-by: Hao-Yu Yang <naup96721@gmail.com>
> Suggested-by: Eric Dumazet <edumazet@google.com>
> Signed-off-by: Hao-Yu Yang <naup96721@gmail.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Thomas Gleixner <tglx@kernel.org>
I think there was also a Reviewed-by from Eric in one of the previous threads.
Thanks,
tglx
The following commit has been merged into the locking/urgent branch of tip:
Commit-ID: 190a8c48ff623c3d67cb295b4536a660db2012aa
Gitweb: https://git.kernel.org/tip/190a8c48ff623c3d67cb295b4536a660db2012aa
Author: Hao-Yu Yang <naup96721@gmail.com>
AuthorDate: Fri, 13 Mar 2026 20:47:56 +08:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 26 Mar 2026 16:13:48 +01:00
futex: Fix UaF between futex_key_to_node_opt() and vma_replace_policy()
During futex_key_to_node_opt() execution, vma->vm_policy is read under
speculative mmap lock and RCU. Concurrently, mbind() may call
vma_replace_policy() which frees the old mempolicy immediately via
kmem_cache_free().
This creates a race where __futex_key_to_node() dereferences a freed
mempolicy pointer, causing a use-after-free read of mpol->mode.
[ 151.412631] BUG: KASAN: slab-use-after-free in __futex_key_to_node (kernel/futex/core.c:349)
[ 151.414046] Read of size 2 at addr ffff888001c49634 by task e/87
[ 151.415969] Call Trace:
[ 151.416732] __asan_load2 (mm/kasan/generic.c:271)
[ 151.416777] __futex_key_to_node (kernel/futex/core.c:349)
[ 151.416822] get_futex_key (kernel/futex/core.c:374 kernel/futex/core.c:386 kernel/futex/core.c:593)
Fix by adding rcu to __mpol_put().
Fixes: c042c505210d ("futex: Implement FUTEX2_MPOL")
Reported-by: Hao-Yu Yang <naup96721@gmail.com>
Suggested-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Hao-Yu Yang <naup96721@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: David Hildenbrand (Arm) <david@kernel.org>
Link: https://patch.msgid.link/20260324174418.GB1850007@noisy.programming.kicks-ass.net
---
include/linux/mempolicy.h | 1 +
kernel/futex/core.c | 2 +-
mm/mempolicy.c | 10 ++++++++--
3 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 0fe96f3..65c732d 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -55,6 +55,7 @@ struct mempolicy {
nodemask_t cpuset_mems_allowed; /* relative to these nodes */
nodemask_t user_nodemask; /* nodemask passed by user */
} w;
+ struct rcu_head rcu;
};
/*
diff --git a/kernel/futex/core.c b/kernel/futex/core.c
index cf7e610..31e83a0 100644
--- a/kernel/futex/core.c
+++ b/kernel/futex/core.c
@@ -342,7 +342,7 @@ static int __futex_key_to_node(struct mm_struct *mm, unsigned long addr)
if (!vma)
return FUTEX_NO_NODE;
- mpol = vma_policy(vma);
+ mpol = READ_ONCE(vma->vm_policy);
if (!mpol)
return FUTEX_NO_NODE;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 0e5175f..cf92bd6 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -487,7 +487,13 @@ void __mpol_put(struct mempolicy *pol)
{
if (!atomic_dec_and_test(&pol->refcnt))
return;
- kmem_cache_free(policy_cache, pol);
+ /*
+ * Required to allow mmap_lock_speculative*() access, see for example
+ * futex_key_to_node_opt(). All accesses are serialized by mmap_lock,
+ * however the speculative lock section unbound by the normal lock
+ * boundaries, requiring RCU freeing.
+ */
+ kfree_rcu(pol, rcu);
}
EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm");
@@ -1020,7 +1026,7 @@ static int vma_replace_policy(struct vm_area_struct *vma,
}
old = vma->vm_policy;
- vma->vm_policy = new; /* protected by mmap_lock */
+ WRITE_ONCE(vma->vm_policy, new); /* protected by mmap_lock */
mpol_put(old);
return 0;
So this patch is correct? What i need to do?
On Tue, Mar 24, 2026 at 03:00:19PM +0100, Peter Zijlstra wrote:
> On Mon, Mar 23, 2026 at 06:24:42PM +0100, Thomas Gleixner wrote:
>
> > > include/linux/mempolicy.h | 1 +
> > > mm/mempolicy.c | 2 +-
> > > 2 files changed, 2 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
> > > index 0fe96f3ab3ef..65c732d440d2 100644
> > > --- a/include/linux/mempolicy.h
> > > +++ b/include/linux/mempolicy.h
> > > @@ -55,6 +55,7 @@ struct mempolicy {
> > > nodemask_t cpuset_mems_allowed; /* relative to these nodes */
> > > nodemask_t user_nodemask; /* nodemask passed by user */
> > > } w;
> > > + struct rcu_head rcu;
> > > };
> > >
> > > /*
> > > diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> > > index 0e5175f1c767..6dc61a3d4a32 100644
> > > --- a/mm/mempolicy.c
> > > +++ b/mm/mempolicy.c
> > > @@ -487,7 +487,7 @@ void __mpol_put(struct mempolicy *pol)
> > > {
> > > if (!atomic_dec_and_test(&pol->refcnt))
> > > return;
> > > - kmem_cache_free(policy_cache, pol);
> > > + kfree_rcu(pol, rcu);
> > > }
> > > EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm");
> >
> > While this looks functionally correct it is incomplete in terms of RCU.
> >
> > The vma->vm_policy pointer needs to be marked __rcu. That then requires
> > to use rcu_dereference_check() at the reader side and
> > rcu_assign_pointer() and rcu_replace_pointer() on the writer side.
>
> I hate that sparse annotation; it mostly just makes the code unreadable
> for then requiring those unwieldy rcu helper functions.
>
> Not to mention we don't actually need any of that here, because:
>
> > Especially the writer side is required so that the proper memory
> > barriers are inserted for architectures with a weakly ordered memory
> > model.
>
> The vma->vm_policy thing is written under mmap_lock held for writing,
> and the futex consumer is a speculative read lock. Specifically the
> ordering is through the associated seqcount.
>
> All that is really needed is to extend the lifetime of the mpol to the
> associated RCU period. Which is exactly what this patch does.
>
> Want me to go write up a better Changelog?
Sorry, I'm not familier RCU and mempolicy. I don't know how to patch this part correctly.
On Mon, Mar 23, 2026 at 06:24:42PM +0100, Thomas Gleixner wrote:
> Hao-Yu!
>
> On Fri, Mar 13 2026 at 20:47, Hao-Yu Yang wrote:
>
> I've removed the security list as this is public already.
> Also added the mm list and the maintainers. While it fixes the futex
> problem it is a change to the MM subsystem, so those people need to be
> involved.
>
> > During futex_key_to_node_opt() execution, vma->vm_policy is read under
> > speculative mmap lock and RCU. Concurrently, mbind() may call
> > vma_replace_policy() which frees the old mempolicy immediately via
> > kmem_cache_free().
> >
> > This creates a race where __futex_key_to_node() dereferences a freed
> > mempolicy pointer, causing a use-after-free read of mpol->mode.
>
> > [ 151.412631] BUG: KASAN: slab-use-after-free in __futex_key_to_node (kernel/futex/core.c:349)
> > [ 151.414046] Read of size 2 at addr ffff888001c49634 by task e/87
> > [ 151.414476]
> > [ 151.415431] CPU: 1 UID: 1000 PID: 87 Comm: e Not tainted 7.0.0-rc3-g0257f64bdac7 #1 PREEMPT(lazy)
> > [ 151.415758] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
> > [ 151.415969] Call Trace:
> > [ 151.416059] <TASK>
> > [ 151.416161] dump_stack_lvl (lib/dump_stack.c:123)
> > [ 151.416299] print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
> > [ 151.416359] ? __virt_addr_valid (./include/linux/mmzone.h:2046 ./include/linux/mmzone.h:2198 arch/x86/mm/physaddr.c:54)
> > [ 151.416412] ? __futex_key_to_node (kernel/futex/core.c:349)
> > [ 151.416517] ? kasan_complete_mode_report_info (mm/kasan/report_generic.c:182)
> > [ 151.416583] ? __futex_key_to_node (kernel/futex/core.c:349)
> > [ 151.416631] kasan_report (mm/kasan/report.c:597)
> > [ 151.416677] ? __futex_key_to_node (kernel/futex/core.c:349)
> > [ 151.416732] __asan_load2 (mm/kasan/generic.c:271)
> > [ 151.416777] __futex_key_to_node (kernel/futex/core.c:349)
> > [ 151.416822] get_futex_key (kernel/futex/core.c:374 kernel/futex/core.c:386 kernel/futex/core.c:593)
> > [ 151.416871] ? __pfx_get_futex_key (kernel/futex/core.c:550)
> > [ 151.416927] futex_wake (kernel/futex/waitwake.c:165)
> > [ 151.416976] ? __pfx_futex_wake (kernel/futex/waitwake.c:156)
> > [ 151.417022] ? __pfx___x64_sys_futex_wait (kernel/futex/syscalls.c:398)
> > [ 151.417081] __x64_sys_futex_wake (kernel/futex/syscalls.c:382 kernel/futex/syscalls.c:366 kernel/futex/syscalls.c:366)
> > [ 151.417129] x64_sys_call (arch/x86/entry/syscall_64.c:41)
> > [ 151.417236] do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94)
> > [ 151.417342] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
> > [ 151.418312] </TASK>
>
> Please trim the backtrace so it only contains the real important
> information.
>
> https://docs.kernel.org/process/submitting-patches.html#backtraces-in-commit-messages
>
> > Fix by adding rcu to __mpol_put().
> >
> > change-log:
> > v2-v1: add rcu to __mpol_put
>
> The change history is not part of the change log, it want's to be placed
> after the --- separator.
>
> > Fixes: c042c505210d ("futex: Implement FUTEX2_MPOL")
> > Reported-by: Hao-Yu Yang <naup96721@gmail.com>
> > Signed-off-by: Hao-Yu Yang <naup96721@gmail.com>
>
> This should have a
>
> Suggested-by: Eric Dumazet <edumazet@google.com>
>
> tag.
>
> > ---
> > include/linux/mempolicy.h | 1 +
> > mm/mempolicy.c | 2 +-
> > 2 files changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
> > index 0fe96f3ab3ef..65c732d440d2 100644
> > --- a/include/linux/mempolicy.h
> > +++ b/include/linux/mempolicy.h
> > @@ -55,6 +55,7 @@ struct mempolicy {
> > nodemask_t cpuset_mems_allowed; /* relative to these nodes */
> > nodemask_t user_nodemask; /* nodemask passed by user */
> > } w;
> > + struct rcu_head rcu;
> > };
> >
> > /*
> > diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> > index 0e5175f1c767..6dc61a3d4a32 100644
> > --- a/mm/mempolicy.c
> > +++ b/mm/mempolicy.c
> > @@ -487,7 +487,7 @@ void __mpol_put(struct mempolicy *pol)
> > {
> > if (!atomic_dec_and_test(&pol->refcnt))
> > return;
> > - kmem_cache_free(policy_cache, pol);
> > + kfree_rcu(pol, rcu);
> > }
> > EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm");
>
> While this looks functionally correct it is incomplete in terms of RCU.
>
> The vma->vm_policy pointer needs to be marked __rcu. That then requires
> to use rcu_dereference_check() at the reader side and
> rcu_assign_pointer() and rcu_replace_pointer() on the writer side.
>
> Especially the writer side is required so that the proper memory
> barriers are inserted for architectures with a weakly ordered memory
> model.
>
> Thanks,
>
> tglx
OK, so now i need to submit v3 patch?
On Mon, Mar 23, 2026 at 06:24:42PM +0100, Thomas Gleixner wrote:
> Hao-Yu!
>
> On Fri, Mar 13 2026 at 20:47, Hao-Yu Yang wrote:
>
> I've removed the security list as this is public already.
> Also added the mm list and the maintainers. While it fixes the futex
> problem it is a change to the MM subsystem, so those people need to be
> involved.
>
> > During futex_key_to_node_opt() execution, vma->vm_policy is read under
> > speculative mmap lock and RCU. Concurrently, mbind() may call
> > vma_replace_policy() which frees the old mempolicy immediately via
> > kmem_cache_free().
> >
> > This creates a race where __futex_key_to_node() dereferences a freed
> > mempolicy pointer, causing a use-after-free read of mpol->mode.
>
> > [ 151.412631] BUG: KASAN: slab-use-after-free in __futex_key_to_node (kernel/futex/core.c:349)
> > [ 151.414046] Read of size 2 at addr ffff888001c49634 by task e/87
> > [ 151.414476]
> > [ 151.415431] CPU: 1 UID: 1000 PID: 87 Comm: e Not tainted 7.0.0-rc3-g0257f64bdac7 #1 PREEMPT(lazy)
> > [ 151.415758] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
> > [ 151.415969] Call Trace:
> > [ 151.416059] <TASK>
> > [ 151.416161] dump_stack_lvl (lib/dump_stack.c:123)
> > [ 151.416299] print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
> > [ 151.416359] ? __virt_addr_valid (./include/linux/mmzone.h:2046 ./include/linux/mmzone.h:2198 arch/x86/mm/physaddr.c:54)
> > [ 151.416412] ? __futex_key_to_node (kernel/futex/core.c:349)
> > [ 151.416517] ? kasan_complete_mode_report_info (mm/kasan/report_generic.c:182)
> > [ 151.416583] ? __futex_key_to_node (kernel/futex/core.c:349)
> > [ 151.416631] kasan_report (mm/kasan/report.c:597)
> > [ 151.416677] ? __futex_key_to_node (kernel/futex/core.c:349)
> > [ 151.416732] __asan_load2 (mm/kasan/generic.c:271)
> > [ 151.416777] __futex_key_to_node (kernel/futex/core.c:349)
> > [ 151.416822] get_futex_key (kernel/futex/core.c:374 kernel/futex/core.c:386 kernel/futex/core.c:593)
> > [ 151.416871] ? __pfx_get_futex_key (kernel/futex/core.c:550)
> > [ 151.416927] futex_wake (kernel/futex/waitwake.c:165)
> > [ 151.416976] ? __pfx_futex_wake (kernel/futex/waitwake.c:156)
> > [ 151.417022] ? __pfx___x64_sys_futex_wait (kernel/futex/syscalls.c:398)
> > [ 151.417081] __x64_sys_futex_wake (kernel/futex/syscalls.c:382 kernel/futex/syscalls.c:366 kernel/futex/syscalls.c:366)
> > [ 151.417129] x64_sys_call (arch/x86/entry/syscall_64.c:41)
> > [ 151.417236] do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94)
> > [ 151.417342] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
> > [ 151.418312] </TASK>
>
> Please trim the backtrace so it only contains the real important
> information.
>
> https://docs.kernel.org/process/submitting-patches.html#backtraces-in-commit-messages
>
> > Fix by adding rcu to __mpol_put().
> >
> > change-log:
> > v2-v1: add rcu to __mpol_put
>
> The change history is not part of the change log, it want's to be placed
> after the --- separator.
>
> > Fixes: c042c505210d ("futex: Implement FUTEX2_MPOL")
> > Reported-by: Hao-Yu Yang <naup96721@gmail.com>
> > Signed-off-by: Hao-Yu Yang <naup96721@gmail.com>
>
> This should have a
>
> Suggested-by: Eric Dumazet <edumazet@google.com>
>
> tag.
>
> > ---
> > include/linux/mempolicy.h | 1 +
> > mm/mempolicy.c | 2 +-
> > 2 files changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
> > index 0fe96f3ab3ef..65c732d440d2 100644
> > --- a/include/linux/mempolicy.h
> > +++ b/include/linux/mempolicy.h
> > @@ -55,6 +55,7 @@ struct mempolicy {
> > nodemask_t cpuset_mems_allowed; /* relative to these nodes */
> > nodemask_t user_nodemask; /* nodemask passed by user */
> > } w;
> > + struct rcu_head rcu;
> > };
> >
> > /*
> > diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> > index 0e5175f1c767..6dc61a3d4a32 100644
> > --- a/mm/mempolicy.c
> > +++ b/mm/mempolicy.c
> > @@ -487,7 +487,7 @@ void __mpol_put(struct mempolicy *pol)
> > {
> > if (!atomic_dec_and_test(&pol->refcnt))
> > return;
> > - kmem_cache_free(policy_cache, pol);
> > + kfree_rcu(pol, rcu);
> > }
> > EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm");
>
> While this looks functionally correct it is incomplete in terms of RCU.
>
> The vma->vm_policy pointer needs to be marked __rcu. That then requires
> to use rcu_dereference_check() at the reader side and
> rcu_assign_pointer() and rcu_replace_pointer() on the writer side.
>
> Especially the writer side is required so that the proper memory
> barriers are inserted for architectures with a weakly ordered memory
> model.
>
> Thanks,
>
> tglx
On Fri, Mar 13, 2026 at 08:47:56PM +0800, Hao-Yu Yang wrote:
> During futex_key_to_node_opt() execution, vma->vm_policy is read under
> speculative mmap lock and RCU. Concurrently, mbind() may call
> vma_replace_policy() which frees the old mempolicy immediately via
> kmem_cache_free().
>
> This creates a race where __futex_key_to_node() dereferences a freed
> mempolicy pointer, causing a use-after-free read of mpol->mode.
>
> [ 151.412631] BUG: KASAN: slab-use-after-free in __futex_key_to_node (kernel/futex/core.c:349)
> [ 151.414046] Read of size 2 at addr ffff888001c49634 by task e/87
> [ 151.414476]
> [ 151.415431] CPU: 1 UID: 1000 PID: 87 Comm: e Not tainted 7.0.0-rc3-g0257f64bdac7 #1 PREEMPT(lazy)
> [ 151.415758] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
> [ 151.415969] Call Trace:
> [ 151.416059] <TASK>
> [ 151.416161] dump_stack_lvl (lib/dump_stack.c:123)
> [ 151.416299] print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
> [ 151.416359] ? __virt_addr_valid (./include/linux/mmzone.h:2046 ./include/linux/mmzone.h:2198 arch/x86/mm/physaddr.c:54)
> [ 151.416412] ? __futex_key_to_node (kernel/futex/core.c:349)
> [ 151.416517] ? kasan_complete_mode_report_info (mm/kasan/report_generic.c:182)
> [ 151.416583] ? __futex_key_to_node (kernel/futex/core.c:349)
> [ 151.416631] kasan_report (mm/kasan/report.c:597)
> [ 151.416677] ? __futex_key_to_node (kernel/futex/core.c:349)
> [ 151.416732] __asan_load2 (mm/kasan/generic.c:271)
> [ 151.416777] __futex_key_to_node (kernel/futex/core.c:349)
> [ 151.416822] get_futex_key (kernel/futex/core.c:374 kernel/futex/core.c:386 kernel/futex/core.c:593)
> [ 151.416871] ? __pfx_get_futex_key (kernel/futex/core.c:550)
> [ 151.416927] futex_wake (kernel/futex/waitwake.c:165)
> [ 151.416976] ? __pfx_futex_wake (kernel/futex/waitwake.c:156)
> [ 151.417022] ? __pfx___x64_sys_futex_wait (kernel/futex/syscalls.c:398)
> [ 151.417081] __x64_sys_futex_wake (kernel/futex/syscalls.c:382 kernel/futex/syscalls.c:366 kernel/futex/syscalls.c:366)
> [ 151.417129] x64_sys_call (arch/x86/entry/syscall_64.c:41)
> [ 151.417236] do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94)
> [ 151.417342] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
> [ 151.418312] </TASK>
>
> Fix by adding rcu to __mpol_put().
>
> change-log:
> v2-v1: add rcu to __mpol_put
>
> Fixes: c042c505210d ("futex: Implement FUTEX2_MPOL")
> Reported-by: Hao-Yu Yang <naup96721@gmail.com>
> Signed-off-by: Hao-Yu Yang <naup96721@gmail.com>
> ---
> include/linux/mempolicy.h | 1 +
> mm/mempolicy.c | 2 +-
> 2 files changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
> index 0fe96f3ab3ef..65c732d440d2 100644
> --- a/include/linux/mempolicy.h
> +++ b/include/linux/mempolicy.h
> @@ -55,6 +55,7 @@ struct mempolicy {
> nodemask_t cpuset_mems_allowed; /* relative to these nodes */
> nodemask_t user_nodemask; /* nodemask passed by user */
> } w;
> + struct rcu_head rcu;
> };
>
> /*
> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
> index 0e5175f1c767..6dc61a3d4a32 100644
> --- a/mm/mempolicy.c
> +++ b/mm/mempolicy.c
> @@ -487,7 +487,7 @@ void __mpol_put(struct mempolicy *pol)
> {
> if (!atomic_dec_and_test(&pol->refcnt))
> return;
> - kmem_cache_free(policy_cache, pol);
> + kfree_rcu(pol, rcu);
> }
> EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm");
>
> --
> 2.34.1
>
Hi, I’d like to kindly ask if there’s an update on when this patch might be merged.
On Sun, Mar 22 2026 at 07:57, Hao-Yu Yang wrote:
> On Fri, Mar 13, 2026 at 08:47:56PM +0800, Hao-Yu Yang wrote:
>> During futex_key_to_node_opt() execution, vma->vm_policy is read under
>> speculative mmap lock and RCU. Concurrently, mbind() may call
>> vma_replace_policy() which frees the old mempolicy immediately via
>> kmem_cache_free().
>>
>> This creates a race where __futex_key_to_node() dereferences a freed
>> mempolicy pointer, causing a use-after-free read of mpol->mode.
>>
>> [ 151.412631] BUG: KASAN: slab-use-after-free in __futex_key_to_node (kernel/futex/core.c:349)
>> [ 151.414046] Read of size 2 at addr ffff888001c49634 by task e/87
>> [ 151.414476]
>> [ 151.415431] CPU: 1 UID: 1000 PID: 87 Comm: e Not tainted 7.0.0-rc3-g0257f64bdac7 #1 PREEMPT(lazy)
>> [ 151.415758] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
>> [ 151.415969] Call Trace:
>> [ 151.416059] <TASK>
>> [ 151.416161] dump_stack_lvl (lib/dump_stack.c:123)
>> [ 151.416299] print_report (mm/kasan/report.c:379 mm/kasan/report.c:482)
>> [ 151.416359] ? __virt_addr_valid (./include/linux/mmzone.h:2046 ./include/linux/mmzone.h:2198 arch/x86/mm/physaddr.c:54)
>> [ 151.416412] ? __futex_key_to_node (kernel/futex/core.c:349)
>> [ 151.416517] ? kasan_complete_mode_report_info (mm/kasan/report_generic.c:182)
>> [ 151.416583] ? __futex_key_to_node (kernel/futex/core.c:349)
>> [ 151.416631] kasan_report (mm/kasan/report.c:597)
>> [ 151.416677] ? __futex_key_to_node (kernel/futex/core.c:349)
>> [ 151.416732] __asan_load2 (mm/kasan/generic.c:271)
>> [ 151.416777] __futex_key_to_node (kernel/futex/core.c:349)
>> [ 151.416822] get_futex_key (kernel/futex/core.c:374 kernel/futex/core.c:386 kernel/futex/core.c:593)
>> [ 151.416871] ? __pfx_get_futex_key (kernel/futex/core.c:550)
>> [ 151.416927] futex_wake (kernel/futex/waitwake.c:165)
>> [ 151.416976] ? __pfx_futex_wake (kernel/futex/waitwake.c:156)
>> [ 151.417022] ? __pfx___x64_sys_futex_wait (kernel/futex/syscalls.c:398)
>> [ 151.417081] __x64_sys_futex_wake (kernel/futex/syscalls.c:382 kernel/futex/syscalls.c:366 kernel/futex/syscalls.c:366)
>> [ 151.417129] x64_sys_call (arch/x86/entry/syscall_64.c:41)
>> [ 151.417236] do_syscall_64 (arch/x86/entry/syscall_64.c:63 arch/x86/entry/syscall_64.c:94)
>> [ 151.417342] entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
>> [ 151.418312] </TASK>
>>
>> Fix by adding rcu to __mpol_put().
>>
>> change-log:
>> v2-v1: add rcu to __mpol_put
>>
>> Fixes: c042c505210d ("futex: Implement FUTEX2_MPOL")
>> Reported-by: Hao-Yu Yang <naup96721@gmail.com>
>> Signed-off-by: Hao-Yu Yang <naup96721@gmail.com>
>> ---
>> include/linux/mempolicy.h | 1 +
>> mm/mempolicy.c | 2 +-
>> 2 files changed, 2 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
>> index 0fe96f3ab3ef..65c732d440d2 100644
>> --- a/include/linux/mempolicy.h
>> +++ b/include/linux/mempolicy.h
>> @@ -55,6 +55,7 @@ struct mempolicy {
>> nodemask_t cpuset_mems_allowed; /* relative to these nodes */
>> nodemask_t user_nodemask; /* nodemask passed by user */
>> } w;
>> + struct rcu_head rcu;
>> };
>>
>> /*
>> diff --git a/mm/mempolicy.c b/mm/mempolicy.c
>> index 0e5175f1c767..6dc61a3d4a32 100644
>> --- a/mm/mempolicy.c
>> +++ b/mm/mempolicy.c
>> @@ -487,7 +487,7 @@ void __mpol_put(struct mempolicy *pol)
>> {
>> if (!atomic_dec_and_test(&pol->refcnt))
>> return;
>> - kmem_cache_free(policy_cache, pol);
>> + kfree_rcu(pol, rcu);
>> }
>> EXPORT_SYMBOL_FOR_MODULES(__mpol_put, "kvm");
>>
>> --
>> 2.34.1
>>
>
> Hi, I’d like to kindly ask if there’s an update on when this patch might be merged.
Thanks for the reminder. I'll take care of it tomorrow
Thanks,
tglx
© 2016 - 2026 Red Hat, Inc.