kernel/bpf/task_iter.c | 50 +++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 8 deletions(-)
bpf_find_vma() reads task->mm and calls mmap_read_trylock(mm) without
holding a reference on the mm. On a foreign task, a concurrent exit_mm()
can free the mm_struct between the lockless read and the trylock,
resulting in a use-after-free. mm_struct is not SLAB_TYPESAFE_BY_RCU.
For the current task, task->mm is stable. For a foreign task, pin the mm
under task->alloc_lock and release it with mmput_async(), mirroring commit
d8e27d2d22b6 ("bpf: fix mm lifecycle in open-coded task_vma iterator").
Use spin_trylock() instead of get_task_mm() so BPF context does not block
on alloc_lock. Reject irqs-disabled contexts and !CONFIG_MMU on the
foreign-task path because dropping the mm reference is not safe there.
Race:
CPU0 (BPF program) CPU1 (exiting task)
============================ ==========================
bpf_find_vma(foreign_task):
mm = task->mm
exit_mm():
task->mm = NULL
mmput(mm) -> frees mm_struct
mmap_read_trylock(mm)
// UAF on mm
Fixes: 7c7e3d31e785 ("bpf: Introduce helper bpf_find_vma")
Signed-off-by: Sanghyun Park <sanghyun.park.cnu@gmail.com>
---
v4:
- Use [PATCH bpf-next] subject as requested by Alexei.
- Add the missing BPF maintainers/reviewers to Cc.
v3: https://lore.kernel.org/bpf/20260609105216.3536839-1-sanghyun.park.cnu@gmail.com/
- Drop get_task_mm()+mmput(); mirror d8e27d2d22b6 with alloc_lock
trylock + mmput_async(). (Yonghong Song)
- Reject irqs-disabled contexts on the foreign-task path.
- Reject foreign-task path when !CONFIG_MMU: bpf_iter_mmput_async()
falls back to mmput() which may sleep, and bpf_find_vma() can run
in non-sleepable context.
- Shorten the foreign-task rationale comment and trim the changelog body.
- Fix the v2's whitespace damage.
v2: https://lore.kernel.org/bpf/CAOrxSK5_7e4114VyfEU9htGi+UneuNt88fGVKOAa3_ZenPOFkA@mail.gmail.com/
kernel/bpf/task_iter.c | 50 +++++++++++++++++++++++++++++++++++-------
1 file changed, 42 insertions(+), 8 deletions(-)
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index fc5f463ca5..baee813290 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -754,12 +754,22 @@ static struct bpf_iter_reg task_vma_reg_info = {
.show_fdinfo = bpf_iter_task_show_fdinfo,
};
+static inline void bpf_iter_mmput_async(struct mm_struct *mm)
+{
+#ifdef CONFIG_MMU
+ mmput_async(mm);
+#else
+ mmput(mm);
+#endif
+}
+
BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start,
bpf_callback_t, callback_fn, void *, callback_ctx, u64, flags)
{
struct mmap_unlock_irq_work *work = NULL;
struct vm_area_struct *vma;
bool irq_work_busy = false;
+ bool mmput_needed = false;
struct mm_struct *mm;
int ret = -ENOENT;
@@ -769,14 +779,38 @@ BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start,
if (!task)
return -ENOENT;
- mm = task->mm;
+ if (task == current) {
+ mm = task->mm;
+ } else {
+ /*
+ * Foreign task: pin task->mm against a concurrent exit_mm().
+ * Use trylock on alloc_lock instead of get_task_mm()'s
+ * blocking task_lock() to avoid deadlocking the target task.
+ */
+ if (!IS_ENABLED(CONFIG_MMU))
+ return -EOPNOTSUPP;
+ if (irqs_disabled())
+ return -EBUSY;
+ if (!spin_trylock(&task->alloc_lock))
+ return -EBUSY;
+ mm = task->mm;
+ if (mm && !(task->flags & PF_KTHREAD)) {
+ mmget(mm);
+ mmput_needed = true;
+ } else {
+ mm = NULL;
+ }
+ spin_unlock(&task->alloc_lock);
+ }
if (!mm)
return -ENOENT;
irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
- if (irq_work_busy || !mmap_read_trylock(mm))
- return -EBUSY;
+ if (irq_work_busy || !mmap_read_trylock(mm)) {
+ ret = -EBUSY;
+ goto out;
+ }
vma = find_vma(mm, start);
@@ -786,6 +820,9 @@ BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start,
ret = 0;
}
bpf_mmap_unlock_mm(work, mm);
+out:
+ if (mmput_needed)
+ bpf_iter_mmput_async(mm);
return ret;
}
@@ -800,15 +837,6 @@ const struct bpf_func_proto bpf_find_vma_proto = {
.arg5_type = ARG_ANYTHING,
};
-static inline void bpf_iter_mmput_async(struct mm_struct *mm)
-{
-#ifdef CONFIG_MMU
- mmput_async(mm);
-#else
- mmput(mm);
-#endif
-}
-
struct bpf_iter_task_vma_kern_data {
struct task_struct *task;
struct mm_struct *mm;
--
2.48.1
> diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
> index e791ae065c39..77f90b887bf1 100644
> --- a/kernel/bpf/task_iter.c
> +++ b/kernel/bpf/task_iter.c
[ ... ]
> @@ -765,14 +775,38 @@ BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start,
> if (!task)
> return -ENOENT;
>
> - mm = task->mm;
> + if (task == current) {
> + mm = task->mm;
> + } else {
> + /*
> + * Foreign task: pin task->mm against a concurrent exit_mm().
> + * Use trylock on alloc_lock instead of get_task_mm()'s
> + * blocking task_lock() to avoid deadlocking the target task.
> + */
> + if (!IS_ENABLED(CONFIG_MMU))
> + return -EOPNOTSUPP;
> + if (irqs_disabled())
> + return -EBUSY;
> + if (!spin_trylock(&task->alloc_lock))
> + return -EBUSY;
> + mm = task->mm;
> + if (mm && !(task->flags & PF_KTHREAD)) {
> + mmget(mm);
> + mmput_needed = true;
> + } else {
> + mm = NULL;
> + }
> + spin_unlock(&task->alloc_lock);
> + }
> if (!mm)
> return -ENOENT;
>
> irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
>
> - if (irq_work_busy || !mmap_read_trylock(mm))
> - return -EBUSY;
> + if (irq_work_busy || !mmap_read_trylock(mm)) {
> + ret = -EBUSY;
> + goto out;
> + }
>
> vma = find_vma(mm, start);
^^^^^^^^
sashiko-bot@kernel.org raised a concern about this in v4: bpf_find_vma()
traverses the VMA tree of an external mm_struct (from a foreign task) without
checking check_stable_address_space(mm).
When inspecting a foreign task, the mm_struct could be flagged with
MMF_UNSTABLE if it was subject to an OOM reap or a dup_mmap() failure. In this
state, the maple tree can contain XA_ZERO_ENTRY markers.
If find_vma() returns one of these markers, could the subsequent check using
vma->vm_start dereference an internal integer value and cause a kernel panic?
>
> @@ -782,6 +816,9 @@ BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start,
> ret = 0;
> }
> bpf_mmap_unlock_mm(work, mm);
> +out:
> + if (mmput_needed)
> + bpf_iter_mmput_async(mm);
> return ret;
> }
---
AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
CI run summary: https://github.com/kernel-patches/bpf/actions/runs/27250036662
This looks like a separate pre-existing issue. This patch only fixes the mm_struct lifetime/UAF bug by pinning the foreign task's mm before using it. I can look into check_stable_address_space(mm) as a separate follow-up if maintainers think it is needed here. Thanks, Sanghyun
© 2016 - 2026 Red Hat, Inc.