[PATCH v4] perf/core: Fix refcount bug and potential UAF in perf_mmap

yuhaocheng035@gmail.com posted 1 patch 5 days, 18 hours ago
kernel/events/core.c        | 59 +++++++++++++++++++++++++++++--------
kernel/events/internal.h    |  1 +
kernel/events/ring_buffer.c |  2 ++
3 files changed, 49 insertions(+), 13 deletions(-)
[PATCH v4] perf/core: Fix refcount bug and potential UAF in perf_mmap
Posted by yuhaocheng035@gmail.com 5 days, 18 hours ago
From: Haocheng Yu <yuhaocheng035@gmail.com>

Syzkaller reported a refcount_t: addition on 0; use-after-free warning
in perf_mmap.

The issue is caused by a race condition between a failing mmap() setup
and a concurrent mmap() on a dependent event (e.g., using output
redirection).

In perf_mmap(), the ring_buffer (rb) is allocated and assigned to
event->rb with the mmap_mutex held. The mutex is then released to
perform map_range().

If map_range() fails, perf_mmap_close() is called to clean up.
However, since the mutex was dropped, another thread attaching to
this event (via inherited events or output redirection) can acquire
the mutex, observe the valid event->rb pointer, and attempt to
increment its reference count. If the cleanup path has already
dropped the reference count to zero, this results in a
use-after-free or refcount saturation warning.

Fix this by extending the scope of mmap_mutex to cover the
map_range() call. This ensures that the ring buffer initialization
and mapping (or cleanup on failure) happens atomically effectively,
preventing other threads from accessing a half-initialized or
dying ring buffer.

v2:
Because expanding the guarded region would cause the event->mmap_mutex
to be acquired repeatedly in the perf_mmap_close function, potentially
leading to a self deadlock, the original logic of perf_mmap_close was
retained, and the mutex-holding logic was modified to obtain the
perf_mmap_close_locked function.

v3:
The fix is made smaller by passing the parameter "holds_event_mmap_mutex"
to perf_mmap_close.

v4:
This problem is solved in a smarter way.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202602020208.m7KIjdzW-lkp@intel.com/
Reviewed-by: Ian Rogers <irogers@google.com>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Haocheng Yu <yuhaocheng035@gmail.com>
---
 kernel/events/core.c        | 59 +++++++++++++++++++++++++++++--------
 kernel/events/internal.h    |  1 +
 kernel/events/ring_buffer.c |  2 ++
 3 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 22a0f405585b..d3f978402b1e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7010,7 +7010,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
 }
 
 static void perf_pmu_output_stop(struct perf_event *event);
-
+static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb);
 /*
  * A buffer can be mmap()ed multiple times; either directly through the same
  * event, or through other events by use of perf_event_set_output().
@@ -7025,8 +7025,6 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	mapped_f unmapped = get_mapped(event, event_unmapped);
 	struct perf_buffer *rb = ring_buffer_get(event);
 	struct user_struct *mmap_user = rb->mmap_user;
-	int mmap_locked = rb->mmap_locked;
-	unsigned long size = perf_data_size(rb);
 	bool detach_rest = false;
 
 	/* FIXIES vs perf_pmu_unregister() */
@@ -7121,11 +7119,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	 * Aside from that, this buffer is 'fully' detached and unmapped,
 	 * undo the VM accounting.
 	 */
-
-	atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
-			&mmap_user->locked_vm);
-	atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
-	free_uid(mmap_user);
+	perf_mmap_unaccount(vma, rb);
 
 out_put:
 	ring_buffer_put(rb); /* could be last */
@@ -7265,6 +7259,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long
 	atomic64_add(extra, &vma->vm_mm->pinned_vm);
 }
 
+static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb)
+{
+	struct user_struct *user = rb->mmap_user;
+
+	atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked,
+			&user->locked_vm);
+	atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm);
+}
+
 static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
 			unsigned long nr_pages)
 {
@@ -7327,8 +7330,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
 	if (!rb)
 		return -ENOMEM;
 
-	refcount_set(&rb->mmap_count, 1);
-	rb->mmap_user = get_current_user();
 	rb->mmap_locked = extra;
 
 	ring_buffer_attach(event, rb);
@@ -7484,10 +7485,42 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		 * vmops::close().
 		 */
 		ret = map_range(event->rb, vma);
-		if (ret)
-			perf_mmap_close(vma);
-	}
+		if (likely(!ret))
+			return 0;
+
+		/* Error path */
 
+		/*
+		 * If this is the first mmap(), then event->mmap_count should
+		 * be stable at 1. It is only modified by:
+		 * perf_mmap_{open,close}() and perf_mmap().
+		 *
+		 * The former are not possible because this mmap() hasn't been
+		 * successful yet, and the latter is serialized by
+		 * event->mmap_mutex which we still hold (note that mmap_lock
+		 * is not strictly sufficient here, because the event fd can
+		 * be passed to another process through trivial means like
+		 * fork(), leading to concurrent mmap() from different mm).
+		 *
+		 * Make sure to remove event->rb before releasing
+		 * event->mmap_mutex, such that any concurrent mmap() will not
+		 * attempt use this failed buffer.
+		 */
+		if (refcount_read(&event->mmap_count) == 1) {
+			/*
+			 * Minimal perf_mmap_close(); there can't be AUX or
+			 * other events on account of this being the first.
+			 */
+			mapped = get_mapped(event, event_unmapped);
+			if (mapped)
+				mapped(event, vma->vm_mm);
+			perf_mmap_unaccount(vma, event->rb);
+			ring_buffer_attach(event, NULL);	/* drops last rb->refcount */
+			refcount_set(&event->mmap_count, 0);
+			return ret;
+		}
+	}
+	perf_mmap_close(vma);
 	return ret;
 }
 
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index d9cc57083091..c03c4f2eea57 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
 	struct perf_buffer *rb;
 
 	rb = container_of(rcu_head, struct perf_buffer, rcu_head);
+	free_uid(rb->mmap_user);
 	rb_free(rb);
 }
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 3e7de2661417..9fe92161715e 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -340,6 +340,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags)
 		rb->paused = 1;
 
 	mutex_init(&rb->aux_mutex);
+	rb->mmap_user = get_current_user();
+	refcount_set(&rb->mmap_count, 1);
 }
 
 void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)

base-commit: 77de62ad3de3967818c3dbe656b7336ebee461d2
-- 
2.51.0
Re: [PATCH v4] perf/core: Fix refcount bug and potential UAF in perf_mmap
Posted by Peter Zijlstra 5 days, 17 hours ago
On Fri, Mar 27, 2026 at 08:29:52PM +0800, yuhaocheng035@gmail.com wrote:
> From: Haocheng Yu <yuhaocheng035@gmail.com>
> 
> Syzkaller reported a refcount_t: addition on 0; use-after-free warning
> in perf_mmap.
> 
> The issue is caused by a race condition between a failing mmap() setup
> and a concurrent mmap() on a dependent event (e.g., using output
> redirection).
> 
> In perf_mmap(), the ring_buffer (rb) is allocated and assigned to
> event->rb with the mmap_mutex held. The mutex is then released to
> perform map_range().
> 
> If map_range() fails, perf_mmap_close() is called to clean up.
> However, since the mutex was dropped, another thread attaching to
> this event (via inherited events or output redirection) can acquire
> the mutex, observe the valid event->rb pointer, and attempt to
> increment its reference count. If the cleanup path has already
> dropped the reference count to zero, this results in a
> use-after-free or refcount saturation warning.
> 
> Fix this by extending the scope of mmap_mutex to cover the
> map_range() call. This ensures that the ring buffer initialization
> and mapping (or cleanup on failure) happens atomically effectively,
> preventing other threads from accessing a half-initialized or
> dying ring buffer.
> 
> v2:
> Because expanding the guarded region would cause the event->mmap_mutex
> to be acquired repeatedly in the perf_mmap_close function, potentially
> leading to a self deadlock, the original logic of perf_mmap_close was
> retained, and the mutex-holding logic was modified to obtain the
> perf_mmap_close_locked function.
> 
> v3:
> The fix is made smaller by passing the parameter "holds_event_mmap_mutex"
> to perf_mmap_close.
> 
> v4:
> This problem is solved in a smarter way.
> 
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202602020208.m7KIjdzW-lkp@intel.com/
> Reviewed-by: Ian Rogers <irogers@google.com>
> Reviewed-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Haocheng Yu <yuhaocheng035@gmail.com>

You can't claim this as your patch. I was the one who wrote it --
yesterday.
Re: [PATCH v4] perf/core: Fix refcount bug and potential UAF in perf_mmap
Posted by Haocheng Yu 5 days, 17 hours ago
Your solution looks much better.

I tried to incorporate it and submit a patch v4. If you are already
handling it, please ignore my patch.

Thanks,
Haocheng

> From: Haocheng Yu <yuhaocheng035@gmail.com>
>
> Syzkaller reported a refcount_t: addition on 0; use-after-free warning
> in perf_mmap.
>
> The issue is caused by a race condition between a failing mmap() setup
> and a concurrent mmap() on a dependent event (e.g., using output
> redirection).
>
> In perf_mmap(), the ring_buffer (rb) is allocated and assigned to
> event->rb with the mmap_mutex held. The mutex is then released to
> perform map_range().
>
> If map_range() fails, perf_mmap_close() is called to clean up.
> However, since the mutex was dropped, another thread attaching to
> this event (via inherited events or output redirection) can acquire
> the mutex, observe the valid event->rb pointer, and attempt to
> increment its reference count. If the cleanup path has already
> dropped the reference count to zero, this results in a
> use-after-free or refcount saturation warning.
>
> Fix this by extending the scope of mmap_mutex to cover the
> map_range() call. This ensures that the ring buffer initialization
> and mapping (or cleanup on failure) happens atomically effectively,
> preventing other threads from accessing a half-initialized or
> dying ring buffer.
>
> v2:
> Because expanding the guarded region would cause the event->mmap_mutex
> to be acquired repeatedly in the perf_mmap_close function, potentially
> leading to a self deadlock, the original logic of perf_mmap_close was
> retained, and the mutex-holding logic was modified to obtain the
> perf_mmap_close_locked function.
>
> v3:
> The fix is made smaller by passing the parameter "holds_event_mmap_mutex"
> to perf_mmap_close.
>
> v4:
> This problem is solved in a smarter way.
>
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202602020208.m7KIjdzW-lkp@intel.com/
> Reviewed-by: Ian Rogers <irogers@google.com>
> Reviewed-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Haocheng Yu <yuhaocheng035@gmail.com>
> ---
>  kernel/events/core.c        | 59 +++++++++++++++++++++++++++++--------
>  kernel/events/internal.h    |  1 +
>  kernel/events/ring_buffer.c |  2 ++
>  3 files changed, 49 insertions(+), 13 deletions(-)
>
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 22a0f405585b..d3f978402b1e 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7010,7 +7010,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
>  }
>
>  static void perf_pmu_output_stop(struct perf_event *event);
> -
> +static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb);
>  /*
>   * A buffer can be mmap()ed multiple times; either directly through the same
>   * event, or through other events by use of perf_event_set_output().
> @@ -7025,8 +7025,6 @@ static void perf_mmap_close(struct vm_area_struct *vma)
>         mapped_f unmapped = get_mapped(event, event_unmapped);
>         struct perf_buffer *rb = ring_buffer_get(event);
>         struct user_struct *mmap_user = rb->mmap_user;
> -       int mmap_locked = rb->mmap_locked;
> -       unsigned long size = perf_data_size(rb);
>         bool detach_rest = false;
>
>         /* FIXIES vs perf_pmu_unregister() */
> @@ -7121,11 +7119,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
>          * Aside from that, this buffer is 'fully' detached and unmapped,
>          * undo the VM accounting.
>          */
> -
> -       atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
> -                       &mmap_user->locked_vm);
> -       atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
> -       free_uid(mmap_user);
> +       perf_mmap_unaccount(vma, rb);
>
>  out_put:
>         ring_buffer_put(rb); /* could be last */
> @@ -7265,6 +7259,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long
>         atomic64_add(extra, &vma->vm_mm->pinned_vm);
>  }
>
> +static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb)
> +{
> +       struct user_struct *user = rb->mmap_user;
> +
> +       atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked,
> +                       &user->locked_vm);
> +       atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm);
> +}
> +
>  static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
>                         unsigned long nr_pages)
>  {
> @@ -7327,8 +7330,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
>         if (!rb)
>                 return -ENOMEM;
>
> -       refcount_set(&rb->mmap_count, 1);
> -       rb->mmap_user = get_current_user();
>         rb->mmap_locked = extra;
>
>         ring_buffer_attach(event, rb);
> @@ -7484,10 +7485,42 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
>                  * vmops::close().
>                  */
>                 ret = map_range(event->rb, vma);
> -               if (ret)
> -                       perf_mmap_close(vma);
> -       }
> +               if (likely(!ret))
> +                       return 0;
> +
> +               /* Error path */
>
> +               /*
> +                * If this is the first mmap(), then event->mmap_count should
> +                * be stable at 1. It is only modified by:
> +                * perf_mmap_{open,close}() and perf_mmap().
> +                *
> +                * The former are not possible because this mmap() hasn't been
> +                * successful yet, and the latter is serialized by
> +                * event->mmap_mutex which we still hold (note that mmap_lock
> +                * is not strictly sufficient here, because the event fd can
> +                * be passed to another process through trivial means like
> +                * fork(), leading to concurrent mmap() from different mm).
> +                *
> +                * Make sure to remove event->rb before releasing
> +                * event->mmap_mutex, such that any concurrent mmap() will not
> +                * attempt use this failed buffer.
> +                */
> +               if (refcount_read(&event->mmap_count) == 1) {
> +                       /*
> +                        * Minimal perf_mmap_close(); there can't be AUX or
> +                        * other events on account of this being the first.
> +                        */
> +                       mapped = get_mapped(event, event_unmapped);
> +                       if (mapped)
> +                               mapped(event, vma->vm_mm);
> +                       perf_mmap_unaccount(vma, event->rb);
> +                       ring_buffer_attach(event, NULL);        /* drops last rb->refcount */
> +                       refcount_set(&event->mmap_count, 0);
> +                       return ret;
> +               }
> +       }
> +       perf_mmap_close(vma);
>         return ret;
>  }
>
> diff --git a/kernel/events/internal.h b/kernel/events/internal.h
> index d9cc57083091..c03c4f2eea57 100644
> --- a/kernel/events/internal.h
> +++ b/kernel/events/internal.h
> @@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
>         struct perf_buffer *rb;
>
>         rb = container_of(rcu_head, struct perf_buffer, rcu_head);
> +       free_uid(rb->mmap_user);
>         rb_free(rb);
>  }
>
> diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> index 3e7de2661417..9fe92161715e 100644
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -340,6 +340,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags)
>                 rb->paused = 1;
>
>         mutex_init(&rb->aux_mutex);
> +       rb->mmap_user = get_current_user();
> +       refcount_set(&rb->mmap_count, 1);
>  }
>
>  void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
>
> base-commit: 77de62ad3de3967818c3dbe656b7336ebee461d2
> --
> 2.51.0
>