[PATCH v3] perf/core: Fix refcount bug and potential UAF in perf_mmap

yuhaocheng035@gmail.com posted 1 patch 1 week, 1 day ago
There is a newer version of this series
kernel/events/core.c | 78 +++++++++++++++++++++++++++-----------------
1 file changed, 48 insertions(+), 30 deletions(-)
[PATCH v3] perf/core: Fix refcount bug and potential UAF in perf_mmap
Posted by yuhaocheng035@gmail.com 1 week, 1 day ago
From: Haocheng Yu <yuhaocheng035@gmail.com>

Syzkaller reported a refcount_t: addition on 0; use-after-free warning
in perf_mmap.

The issue is caused by a race condition between a failing mmap() setup
and a concurrent mmap() on a dependent event (e.g., using output
redirection).

In perf_mmap(), the ring_buffer (rb) is allocated and assigned to
event->rb with the mmap_mutex held. The mutex is then released to
perform map_range().

If map_range() fails, perf_mmap_close() is called to clean up.
However, since the mutex was dropped, another thread attaching to
this event (via inherited events or output redirection) can acquire
the mutex, observe the valid event->rb pointer, and attempt to
increment its reference count. If the cleanup path has already
dropped the reference count to zero, this results in a
use-after-free or refcount saturation warning.

Fix this by extending the scope of mmap_mutex to cover the
map_range() call. This ensures that the ring buffer initialization
and mapping (or cleanup on failure) happens atomically effectively,
preventing other threads from accessing a half-initialized or
dying ring buffer.

v2:
Because expanding the guarded region would cause the event->mmap_mutex
to be acquired repeatedly in the perf_mmap_close function, potentially
leading to a self deadlock, the original logic of perf_mmap_close was
retained, and the mutex-holding logic was modified to obtain the
perf_mmap_close_locked function.

v3:
The fix is made smaller by passing the parameter "holds_event_mmap_mutex"
to perf_mmap_close.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202602020208.m7KIjdzW-lkp@intel.com/
Suggested-by: Ian Rogers <irogers@google.com>
Signed-off-by: Haocheng Yu <yuhaocheng035@gmail.com>
---
 kernel/events/core.c | 78 +++++++++++++++++++++++++++-----------------
 1 file changed, 48 insertions(+), 30 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2c35acc2722b..a3228c587de1 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6730,9 +6730,10 @@ static void perf_pmu_output_stop(struct perf_event *event);
  * the buffer here, where we still have a VM context. This means we need
  * to detach all events redirecting to us.
  */
-static void perf_mmap_close(struct vm_area_struct *vma)
+static void __perf_mmap_close(struct vm_area_struct *vma, struct perf_event *event,
+			      bool holds_event_mmap_lock)
 {
-	struct perf_event *event = vma->vm_file->private_data;
+	struct perf_event *iter_event;
 	mapped_f unmapped = get_mapped(event, event_unmapped);
 	struct perf_buffer *rb = ring_buffer_get(event);
 	struct user_struct *mmap_user = rb->mmap_user;
@@ -6772,11 +6773,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	if (refcount_dec_and_test(&rb->mmap_count))
 		detach_rest = true;
 
-	if (!refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
+	if ((!holds_event_mmap_lock &&
+	     !refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) ||
+	    (holds_event_mmap_lock && !refcount_dec_and_test(&event->mmap_count)))
 		goto out_put;
 
 	ring_buffer_attach(event, NULL);
-	mutex_unlock(&event->mmap_mutex);
+	if (!holds_event_mmap_lock)
+		mutex_unlock(&event->mmap_mutex);
 
 	/* If there's still other mmap()s of this buffer, we're done. */
 	if (!detach_rest)
@@ -6789,8 +6793,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	 */
 again:
 	rcu_read_lock();
-	list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
-		if (!atomic_long_inc_not_zero(&event->refcount)) {
+	list_for_each_entry_rcu(iter_event, &rb->event_list, rb_entry) {
+		if (!atomic_long_inc_not_zero(&iter_event->refcount)) {
 			/*
 			 * This event is en-route to free_event() which will
 			 * detach it and remove it from the list.
@@ -6799,7 +6803,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 		}
 		rcu_read_unlock();
 
-		mutex_lock(&event->mmap_mutex);
+		if (!holds_event_mmap_lock)
+			mutex_lock(&iter_event->mmap_mutex);
 		/*
 		 * Check we didn't race with perf_event_set_output() which can
 		 * swizzle the rb from under us while we were waiting to
@@ -6810,11 +6815,12 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 		 * still restart the iteration to make sure we're not now
 		 * iterating the wrong list.
 		 */
-		if (event->rb == rb)
-			ring_buffer_attach(event, NULL);
+		if (iter_event->rb == rb)
+			ring_buffer_attach(iter_event, NULL);
 
-		mutex_unlock(&event->mmap_mutex);
-		put_event(event);
+		if (!holds_event_mmap_lock)
+			mutex_unlock(&iter_event->mmap_mutex);
+		put_event(iter_event);
 
 		/*
 		 * Restart the iteration; either we're on the wrong list or
@@ -6842,6 +6848,18 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	ring_buffer_put(rb); /* could be last */
 }
 
+static void perf_mmap_close(struct vm_area_struct *vma)
+{
+	struct perf_event *event = vma->vm_file->private_data;
+
+	__perf_mmap_close(vma, event, false);
+}
+
+static void perf_mmap_close_locked(struct vm_area_struct *vma, struct perf_event *event)
+{
+	__perf_mmap_close(vma, event, true);
+}
+
 static vm_fault_t perf_mmap_pfn_mkwrite(struct vm_fault *vmf)
 {
 	/* The first page is the user control page, others are read-only. */
@@ -7167,28 +7185,28 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 			ret = perf_mmap_aux(vma, event, nr_pages);
 		if (ret)
 			return ret;
-	}
 
-	/*
-	 * Since pinned accounting is per vm we cannot allow fork() to copy our
-	 * vma.
-	 */
-	vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
-	vma->vm_ops = &perf_mmap_vmops;
+		/*
+		 * Since pinned accounting is per vm we cannot allow fork() to copy our
+		 * vma.
+		 */
+		vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
+		vma->vm_ops = &perf_mmap_vmops;
 
-	mapped = get_mapped(event, event_mapped);
-	if (mapped)
-		mapped(event, vma->vm_mm);
+		mapped = get_mapped(event, event_mapped);
+		if (mapped)
+			mapped(event, vma->vm_mm);
 
-	/*
-	 * Try to map it into the page table. On fail, invoke
-	 * perf_mmap_close() to undo the above, as the callsite expects
-	 * full cleanup in this case and therefore does not invoke
-	 * vmops::close().
-	 */
-	ret = map_range(event->rb, vma);
-	if (ret)
-		perf_mmap_close(vma);
+		/*
+		 * Try to map it into the page table. On fail, invoke
+		 * perf_mmap_close() to undo the above, as the callsite expects
+		 * full cleanup in this case and therefore does not invoke
+		 * vmops::close().
+		 */
+		ret = map_range(event->rb, vma);
+		if (ret)
+			perf_mmap_close_locked(vma, event);
+	}
 
 	return ret;
 }

base-commit: 7d0a66e4bb9081d75c82ec4957c50034cb0ea449
-- 
2.51.0
Re: [PATCH v3] perf/core: Fix refcount bug and potential UAF in perf_mmap
Posted by Peter Zijlstra 1 week, 1 day ago
Argh,. why is this hidden in this old thread :/

On Wed, Mar 25, 2026 at 06:20:53PM +0800, yuhaocheng035@gmail.com wrote:

> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 2c35acc2722b..a3228c587de1 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -6730,9 +6730,10 @@ static void perf_pmu_output_stop(struct perf_event *event);
>   * the buffer here, where we still have a VM context. This means we need
>   * to detach all events redirecting to us.
>   */
> -static void perf_mmap_close(struct vm_area_struct *vma)
> +static void __perf_mmap_close(struct vm_area_struct *vma, struct perf_event *event,
> +			      bool holds_event_mmap_lock)
>  {
> -	struct perf_event *event = vma->vm_file->private_data;
> +	struct perf_event *iter_event;
>  	mapped_f unmapped = get_mapped(event, event_unmapped);
>  	struct perf_buffer *rb = ring_buffer_get(event);
>  	struct user_struct *mmap_user = rb->mmap_user;
> @@ -6772,11 +6773,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
>  	if (refcount_dec_and_test(&rb->mmap_count))
>  		detach_rest = true;
>  
> -	if (!refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
> +	if ((!holds_event_mmap_lock &&
> +	     !refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) ||
> +	    (holds_event_mmap_lock && !refcount_dec_and_test(&event->mmap_count)))
>  		goto out_put;

*groan*, this is horrible.

Let me have a poke to see if there isn't a saner variant around.
Re: [PATCH v3] perf/core: Fix refcount bug and potential UAF in perf_mmap
Posted by Qing Wang 1 week ago
On Wed, 25 Mar 2026 at 23:17, Peter Zijlstra <peterz@infradead.org> wrote:
> Argh,. why is this hidden in this old thread :/
> 
> On Wed, Mar 25, 2026 at 06:20:53PM +0800, yuhaocheng035@gmail.com wrote:
> 
> > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > index 2c35acc2722b..a3228c587de1 100644
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -6730,9 +6730,10 @@ static void perf_pmu_output_stop(struct perf_event *event);
> >   * the buffer here, where we still have a VM context. This means we need
> >   * to detach all events redirecting to us.
> >   */
> > -static void perf_mmap_close(struct vm_area_struct *vma)
> > +static void __perf_mmap_close(struct vm_area_struct *vma, struct perf_event *event,
> > +			      bool holds_event_mmap_lock)
> >  {
> > -	struct perf_event *event = vma->vm_file->private_data;
> > +	struct perf_event *iter_event;
> >  	mapped_f unmapped = get_mapped(event, event_unmapped);
> >  	struct perf_buffer *rb = ring_buffer_get(event);
> >  	struct user_struct *mmap_user = rb->mmap_user;
> > @@ -6772,11 +6773,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
> >  	if (refcount_dec_and_test(&rb->mmap_count))
> >  		detach_rest = true;
> >  
> > -	if (!refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
> > +	if ((!holds_event_mmap_lock &&
> > +	     !refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) ||
> > +	    (holds_event_mmap_lock && !refcount_dec_and_test(&event->mmap_count)))
> >  		goto out_put;
> 
> *groan*, this is horrible.
> 
> Let me have a poke to see if there isn't a saner variant around.

I think it's ok to move perf_mmap_close() outside the mutex lock, like this:

https://lore.kernel.org/all/20260325153240.GK3739106@noisy.programming.kicks-ass.net/T/#m0f82e8ecdfdfce4acd5121bcb799e864cf05ebf9

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1f5699b339ec..e5ce03ce926d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7485,9 +7485,12 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		 */
 		ret = map_range(event->rb, vma);
 		if (ret)
-			perf_mmap_close(vma);
+			goto out_close;
 	}
+	return 0;
 
+out_close:
+	perf_mmap_close(vma);
 	return ret;
 }

How do you think?

--
Qing
Re: [PATCH v3] perf/core: Fix refcount bug and potential UAF in perf_mmap
Posted by Peter Zijlstra 1 week ago
On Thu, Mar 26, 2026 at 11:18:06AM +0800, Qing Wang wrote:
> On Wed, 25 Mar 2026 at 23:17, Peter Zijlstra <peterz@infradead.org> wrote:
> > Argh,. why is this hidden in this old thread :/
> > 
> > On Wed, Mar 25, 2026 at 06:20:53PM +0800, yuhaocheng035@gmail.com wrote:
> > 
> > > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > > index 2c35acc2722b..a3228c587de1 100644
> > > --- a/kernel/events/core.c
> > > +++ b/kernel/events/core.c
> > > @@ -6730,9 +6730,10 @@ static void perf_pmu_output_stop(struct perf_event *event);
> > >   * the buffer here, where we still have a VM context. This means we need
> > >   * to detach all events redirecting to us.
> > >   */
> > > -static void perf_mmap_close(struct vm_area_struct *vma)
> > > +static void __perf_mmap_close(struct vm_area_struct *vma, struct perf_event *event,
> > > +			      bool holds_event_mmap_lock)
> > >  {
> > > -	struct perf_event *event = vma->vm_file->private_data;
> > > +	struct perf_event *iter_event;
> > >  	mapped_f unmapped = get_mapped(event, event_unmapped);
> > >  	struct perf_buffer *rb = ring_buffer_get(event);
> > >  	struct user_struct *mmap_user = rb->mmap_user;
> > > @@ -6772,11 +6773,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
> > >  	if (refcount_dec_and_test(&rb->mmap_count))
> > >  		detach_rest = true;
> > >  
> > > -	if (!refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
> > > +	if ((!holds_event_mmap_lock &&
> > > +	     !refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) ||
> > > +	    (holds_event_mmap_lock && !refcount_dec_and_test(&event->mmap_count)))
> > >  		goto out_put;
> > 
> > *groan*, this is horrible.
> > 
> > Let me have a poke to see if there isn't a saner variant around.
> 
> I think it's ok to move perf_mmap_close() outside the mutex lock, like this:
> 
> https://lore.kernel.org/all/20260325153240.GK3739106@noisy.programming.kicks-ass.net/T/#m0f82e8ecdfdfce4acd5121bcb799e864cf05ebf9
> 
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 1f5699b339ec..e5ce03ce926d 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7485,9 +7485,12 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
>  		 */
>  		ret = map_range(event->rb, vma);
>  		if (ret)
> -			perf_mmap_close(vma);
> +			goto out_close;
>  	}
> +	return 0;
>  
> +out_close:
> +	perf_mmap_close(vma);
>  	return ret;
>  }
> 
> How do you think?

Well, that will just re-introduce the original problem. As you were told
there.

What about something like this?

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1f5699b339ec..0bb1d8b83bc9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7010,6 +7010,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
 }
 
 static void perf_pmu_output_stop(struct perf_event *event);
+static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb);
 
 /*
  * A buffer can be mmap()ed multiple times; either directly through the same
@@ -7025,8 +7026,6 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	mapped_f unmapped = get_mapped(event, event_unmapped);
 	struct perf_buffer *rb = ring_buffer_get(event);
 	struct user_struct *mmap_user = rb->mmap_user;
-	int mmap_locked = rb->mmap_locked;
-	unsigned long size = perf_data_size(rb);
 	bool detach_rest = false;
 
 	/* FIXIES vs perf_pmu_unregister() */
@@ -7121,11 +7120,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	 * Aside from that, this buffer is 'fully' detached and unmapped,
 	 * undo the VM accounting.
 	 */
-
-	atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
-			&mmap_user->locked_vm);
-	atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
-	free_uid(mmap_user);
+	perf_mmap_unaccount(vma, rb);
 
 out_put:
 	ring_buffer_put(rb); /* could be last */
@@ -7265,6 +7260,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long
 	atomic64_add(extra, &vma->vm_mm->pinned_vm);
 }
 
+static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb)
+{
+	struct user_struct *user = rb->mmap_user;
+
+	atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked,
+			&user->locked_vm);
+	atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm);
+}
+
 static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
 			unsigned long nr_pages)
 {
@@ -7327,8 +7331,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
 	if (!rb)
 		return -ENOMEM;
 
-	refcount_set(&rb->mmap_count, 1);
-	rb->mmap_user = get_current_user();
 	rb->mmap_locked = extra;
 
 	ring_buffer_attach(event, rb);
@@ -7484,10 +7486,43 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		 * vmops::close().
 		 */
 		ret = map_range(event->rb, vma);
-		if (ret)
-			perf_mmap_close(vma);
+		if (likely(!ret))
+			return 0;
+
+		/* Error path */
+
+		/*
+		 * If this is the first mmap(), then event->mmap_count should
+		 * be stable at 1. It is only modified by:
+		 * perf_mmap_{open,close}() and perf_mmap().
+		 *
+		 * The former are not possible because this mmap() hasn't been
+		 * successful yet, and the latter is serialized by
+		 * event->mmap_mutex which we still hold (note that mmap_lock
+		 * is not strictly sufficient here, because the event fd can
+		 * be passed to another process through trivial means like
+		 * fork(), leading to concurrent mmap() from different mm).
+		 *
+		 * Make sure to remove event->rb before releasing
+		 * event->mmap_mutex, such that any concurrent mmap() will not
+		 * attempt use this failed buffer.
+		 */
+		if (refcount_read(&event->mmap_count) == 1) {
+			/*
+			 * Minimal perf_mmap_close(); there can't be AUX or
+			 * other events on account of this being the first.
+			 */
+			mapped = get_mapped(event, event_unmapped);
+			if (mapped)
+				mapped(event, vma->vm_mm);
+			perf_mmap_unaccount(vma, event->rb);
+			ring_buffer_attach(event, NULL);	/* drops last rb->refcount */
+			refcount_set(&event->mmap_count, 0);
+			return ret;
+		}
 	}
 
+	perf_mmap_close(vma);
 	return ret;
 }
 
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index d9cc57083091..c03c4f2eea57 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
 	struct perf_buffer *rb;
 
 	rb = container_of(rcu_head, struct perf_buffer, rcu_head);
+	free_uid(rb->mmap_user);
 	rb_free(rb);
 }
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 3e7de2661417..9fe92161715e 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -340,6 +340,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags)
 		rb->paused = 1;
 
 	mutex_init(&rb->aux_mutex);
+	rb->mmap_user = get_current_user();
+	refcount_set(&rb->mmap_count, 1);
 }
 
 void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
[PATCH v4] perf/core: Fix refcount bug and potential UAF in perf_mmap
Posted by yuhaocheng035@gmail.com 6 days, 11 hours ago
From: Haocheng Yu <yuhaocheng035@gmail.com>

Syzkaller reported a refcount_t: addition on 0; use-after-free warning
in perf_mmap.

The issue is caused by a race condition between a failing mmap() setup
and a concurrent mmap() on a dependent event (e.g., using output
redirection).

In perf_mmap(), the ring_buffer (rb) is allocated and assigned to
event->rb with the mmap_mutex held. The mutex is then released to
perform map_range().

If map_range() fails, perf_mmap_close() is called to clean up.
However, since the mutex was dropped, another thread attaching to
this event (via inherited events or output redirection) can acquire
the mutex, observe the valid event->rb pointer, and attempt to
increment its reference count. If the cleanup path has already
dropped the reference count to zero, this results in a
use-after-free or refcount saturation warning.

Fix this by extending the scope of mmap_mutex to cover the
map_range() call. This ensures that the ring buffer initialization
and mapping (or cleanup on failure) happens atomically effectively,
preventing other threads from accessing a half-initialized or
dying ring buffer.

v2:
Because expanding the guarded region would cause the event->mmap_mutex
to be acquired repeatedly in the perf_mmap_close function, potentially
leading to a self deadlock, the original logic of perf_mmap_close was
retained, and the mutex-holding logic was modified to obtain the
perf_mmap_close_locked function.

v3:
The fix is made smaller by passing the parameter "holds_event_mmap_mutex"
to perf_mmap_close.

v4:
This problem is solved in a smarter way.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202602020208.m7KIjdzW-lkp@intel.com/
Reviewed-by: Ian Rogers <irogers@google.com>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Haocheng Yu <yuhaocheng035@gmail.com>
---
 kernel/events/core.c        | 59 +++++++++++++++++++++++++++++--------
 kernel/events/internal.h    |  1 +
 kernel/events/ring_buffer.c |  2 ++
 3 files changed, 49 insertions(+), 13 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 22a0f405585b..d3f978402b1e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7010,7 +7010,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
 }
 
 static void perf_pmu_output_stop(struct perf_event *event);
-
+static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb);
 /*
  * A buffer can be mmap()ed multiple times; either directly through the same
  * event, or through other events by use of perf_event_set_output().
@@ -7025,8 +7025,6 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	mapped_f unmapped = get_mapped(event, event_unmapped);
 	struct perf_buffer *rb = ring_buffer_get(event);
 	struct user_struct *mmap_user = rb->mmap_user;
-	int mmap_locked = rb->mmap_locked;
-	unsigned long size = perf_data_size(rb);
 	bool detach_rest = false;
 
 	/* FIXIES vs perf_pmu_unregister() */
@@ -7121,11 +7119,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	 * Aside from that, this buffer is 'fully' detached and unmapped,
 	 * undo the VM accounting.
 	 */
-
-	atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
-			&mmap_user->locked_vm);
-	atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
-	free_uid(mmap_user);
+	perf_mmap_unaccount(vma, rb);
 
 out_put:
 	ring_buffer_put(rb); /* could be last */
@@ -7265,6 +7259,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long
 	atomic64_add(extra, &vma->vm_mm->pinned_vm);
 }
 
+static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb)
+{
+	struct user_struct *user = rb->mmap_user;
+
+	atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked,
+			&user->locked_vm);
+	atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm);
+}
+
 static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
 			unsigned long nr_pages)
 {
@@ -7327,8 +7330,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
 	if (!rb)
 		return -ENOMEM;
 
-	refcount_set(&rb->mmap_count, 1);
-	rb->mmap_user = get_current_user();
 	rb->mmap_locked = extra;
 
 	ring_buffer_attach(event, rb);
@@ -7484,10 +7485,42 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		 * vmops::close().
 		 */
 		ret = map_range(event->rb, vma);
-		if (ret)
-			perf_mmap_close(vma);
-	}
+		if (likely(!ret))
+			return 0;
+
+		/* Error path */
 
+		/*
+		 * If this is the first mmap(), then event->mmap_count should
+		 * be stable at 1. It is only modified by:
+		 * perf_mmap_{open,close}() and perf_mmap().
+		 *
+		 * The former are not possible because this mmap() hasn't been
+		 * successful yet, and the latter is serialized by
+		 * event->mmap_mutex which we still hold (note that mmap_lock
+		 * is not strictly sufficient here, because the event fd can
+		 * be passed to another process through trivial means like
+		 * fork(), leading to concurrent mmap() from different mm).
+		 *
+		 * Make sure to remove event->rb before releasing
+		 * event->mmap_mutex, such that any concurrent mmap() will not
+		 * attempt use this failed buffer.
+		 */
+		if (refcount_read(&event->mmap_count) == 1) {
+			/*
+			 * Minimal perf_mmap_close(); there can't be AUX or
+			 * other events on account of this being the first.
+			 */
+			mapped = get_mapped(event, event_unmapped);
+			if (mapped)
+				mapped(event, vma->vm_mm);
+			perf_mmap_unaccount(vma, event->rb);
+			ring_buffer_attach(event, NULL);	/* drops last rb->refcount */
+			refcount_set(&event->mmap_count, 0);
+			return ret;
+		}
+	}
+	perf_mmap_close(vma);
 	return ret;
 }
 
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index d9cc57083091..c03c4f2eea57 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
 	struct perf_buffer *rb;
 
 	rb = container_of(rcu_head, struct perf_buffer, rcu_head);
+	free_uid(rb->mmap_user);
 	rb_free(rb);
 }
 
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 3e7de2661417..9fe92161715e 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -340,6 +340,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags)
 		rb->paused = 1;
 
 	mutex_init(&rb->aux_mutex);
+	rb->mmap_user = get_current_user();
+	refcount_set(&rb->mmap_count, 1);
 }
 
 void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)

base-commit: 77de62ad3de3967818c3dbe656b7336ebee461d2
-- 
2.51.0
Re: [PATCH v4] perf/core: Fix refcount bug and potential UAF in perf_mmap
Posted by Peter Zijlstra 6 days, 11 hours ago
On Fri, Mar 27, 2026 at 08:29:52PM +0800, yuhaocheng035@gmail.com wrote:
> From: Haocheng Yu <yuhaocheng035@gmail.com>
> 
> Syzkaller reported a refcount_t: addition on 0; use-after-free warning
> in perf_mmap.
> 
> The issue is caused by a race condition between a failing mmap() setup
> and a concurrent mmap() on a dependent event (e.g., using output
> redirection).
> 
> In perf_mmap(), the ring_buffer (rb) is allocated and assigned to
> event->rb with the mmap_mutex held. The mutex is then released to
> perform map_range().
> 
> If map_range() fails, perf_mmap_close() is called to clean up.
> However, since the mutex was dropped, another thread attaching to
> this event (via inherited events or output redirection) can acquire
> the mutex, observe the valid event->rb pointer, and attempt to
> increment its reference count. If the cleanup path has already
> dropped the reference count to zero, this results in a
> use-after-free or refcount saturation warning.
> 
> Fix this by extending the scope of mmap_mutex to cover the
> map_range() call. This ensures that the ring buffer initialization
> and mapping (or cleanup on failure) happens atomically effectively,
> preventing other threads from accessing a half-initialized or
> dying ring buffer.
> 
> v2:
> Because expanding the guarded region would cause the event->mmap_mutex
> to be acquired repeatedly in the perf_mmap_close function, potentially
> leading to a self deadlock, the original logic of perf_mmap_close was
> retained, and the mutex-holding logic was modified to obtain the
> perf_mmap_close_locked function.
> 
> v3:
> The fix is made smaller by passing the parameter "holds_event_mmap_mutex"
> to perf_mmap_close.
> 
> v4:
> This problem is solved in a smarter way.
> 
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202602020208.m7KIjdzW-lkp@intel.com/
> Reviewed-by: Ian Rogers <irogers@google.com>
> Reviewed-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Haocheng Yu <yuhaocheng035@gmail.com>

You can't claim this as your patch. I was the one who wrote it --
yesterday.
Re: [PATCH v4] perf/core: Fix refcount bug and potential UAF in perf_mmap
Posted by Haocheng Yu 6 days, 11 hours ago
Your solution looks much better.

I tried to incorporate it and submit a patch v4. If you are already
handling it, please ignore my patch.

Thanks,
Haocheng

> From: Haocheng Yu <yuhaocheng035@gmail.com>
>
> Syzkaller reported a refcount_t: addition on 0; use-after-free warning
> in perf_mmap.
>
> The issue is caused by a race condition between a failing mmap() setup
> and a concurrent mmap() on a dependent event (e.g., using output
> redirection).
>
> In perf_mmap(), the ring_buffer (rb) is allocated and assigned to
> event->rb with the mmap_mutex held. The mutex is then released to
> perform map_range().
>
> If map_range() fails, perf_mmap_close() is called to clean up.
> However, since the mutex was dropped, another thread attaching to
> this event (via inherited events or output redirection) can acquire
> the mutex, observe the valid event->rb pointer, and attempt to
> increment its reference count. If the cleanup path has already
> dropped the reference count to zero, this results in a
> use-after-free or refcount saturation warning.
>
> Fix this by extending the scope of mmap_mutex to cover the
> map_range() call. This ensures that the ring buffer initialization
> and mapping (or cleanup on failure) happens atomically effectively,
> preventing other threads from accessing a half-initialized or
> dying ring buffer.
>
> v2:
> Because expanding the guarded region would cause the event->mmap_mutex
> to be acquired repeatedly in the perf_mmap_close function, potentially
> leading to a self deadlock, the original logic of perf_mmap_close was
> retained, and the mutex-holding logic was modified to obtain the
> perf_mmap_close_locked function.
>
> v3:
> The fix is made smaller by passing the parameter "holds_event_mmap_mutex"
> to perf_mmap_close.
>
> v4:
> This problem is solved in a smarter way.
>
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202602020208.m7KIjdzW-lkp@intel.com/
> Reviewed-by: Ian Rogers <irogers@google.com>
> Reviewed-by: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Haocheng Yu <yuhaocheng035@gmail.com>
> ---
>  kernel/events/core.c        | 59 +++++++++++++++++++++++++++++--------
>  kernel/events/internal.h    |  1 +
>  kernel/events/ring_buffer.c |  2 ++
>  3 files changed, 49 insertions(+), 13 deletions(-)
>
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 22a0f405585b..d3f978402b1e 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -7010,7 +7010,7 @@ static void perf_mmap_open(struct vm_area_struct *vma)
>  }
>
>  static void perf_pmu_output_stop(struct perf_event *event);
> -
> +static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb);
>  /*
>   * A buffer can be mmap()ed multiple times; either directly through the same
>   * event, or through other events by use of perf_event_set_output().
> @@ -7025,8 +7025,6 @@ static void perf_mmap_close(struct vm_area_struct *vma)
>         mapped_f unmapped = get_mapped(event, event_unmapped);
>         struct perf_buffer *rb = ring_buffer_get(event);
>         struct user_struct *mmap_user = rb->mmap_user;
> -       int mmap_locked = rb->mmap_locked;
> -       unsigned long size = perf_data_size(rb);
>         bool detach_rest = false;
>
>         /* FIXIES vs perf_pmu_unregister() */
> @@ -7121,11 +7119,7 @@ static void perf_mmap_close(struct vm_area_struct *vma)
>          * Aside from that, this buffer is 'fully' detached and unmapped,
>          * undo the VM accounting.
>          */
> -
> -       atomic_long_sub((size >> PAGE_SHIFT) + 1 - mmap_locked,
> -                       &mmap_user->locked_vm);
> -       atomic64_sub(mmap_locked, &vma->vm_mm->pinned_vm);
> -       free_uid(mmap_user);
> +       perf_mmap_unaccount(vma, rb);
>
>  out_put:
>         ring_buffer_put(rb); /* could be last */
> @@ -7265,6 +7259,15 @@ static void perf_mmap_account(struct vm_area_struct *vma, long user_extra, long
>         atomic64_add(extra, &vma->vm_mm->pinned_vm);
>  }
>
> +static void perf_mmap_unaccount(struct vm_area_struct *vma, struct perf_buffer *rb)
> +{
> +       struct user_struct *user = rb->mmap_user;
> +
> +       atomic_long_sub((perf_data_size(rb) >> PAGE_SHIFT) + 1 - rb->mmap_locked,
> +                       &user->locked_vm);
> +       atomic64_sub(rb->mmap_locked, &vma->vm_mm->pinned_vm);
> +}
> +
>  static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
>                         unsigned long nr_pages)
>  {
> @@ -7327,8 +7330,6 @@ static int perf_mmap_rb(struct vm_area_struct *vma, struct perf_event *event,
>         if (!rb)
>                 return -ENOMEM;
>
> -       refcount_set(&rb->mmap_count, 1);
> -       rb->mmap_user = get_current_user();
>         rb->mmap_locked = extra;
>
>         ring_buffer_attach(event, rb);
> @@ -7484,10 +7485,42 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
>                  * vmops::close().
>                  */
>                 ret = map_range(event->rb, vma);
> -               if (ret)
> -                       perf_mmap_close(vma);
> -       }
> +               if (likely(!ret))
> +                       return 0;
> +
> +               /* Error path */
>
> +               /*
> +                * If this is the first mmap(), then event->mmap_count should
> +                * be stable at 1. It is only modified by:
> +                * perf_mmap_{open,close}() and perf_mmap().
> +                *
> +                * The former are not possible because this mmap() hasn't been
> +                * successful yet, and the latter is serialized by
> +                * event->mmap_mutex which we still hold (note that mmap_lock
> +                * is not strictly sufficient here, because the event fd can
> +                * be passed to another process through trivial means like
> +                * fork(), leading to concurrent mmap() from different mm).
> +                *
> +                * Make sure to remove event->rb before releasing
> +                * event->mmap_mutex, such that any concurrent mmap() will not
> +                * attempt use this failed buffer.
> +                */
> +               if (refcount_read(&event->mmap_count) == 1) {
> +                       /*
> +                        * Minimal perf_mmap_close(); there can't be AUX or
> +                        * other events on account of this being the first.
> +                        */
> +                       mapped = get_mapped(event, event_unmapped);
> +                       if (mapped)
> +                               mapped(event, vma->vm_mm);
> +                       perf_mmap_unaccount(vma, event->rb);
> +                       ring_buffer_attach(event, NULL);        /* drops last rb->refcount */
> +                       refcount_set(&event->mmap_count, 0);
> +                       return ret;
> +               }
> +       }
> +       perf_mmap_close(vma);
>         return ret;
>  }
>
> diff --git a/kernel/events/internal.h b/kernel/events/internal.h
> index d9cc57083091..c03c4f2eea57 100644
> --- a/kernel/events/internal.h
> +++ b/kernel/events/internal.h
> @@ -67,6 +67,7 @@ static inline void rb_free_rcu(struct rcu_head *rcu_head)
>         struct perf_buffer *rb;
>
>         rb = container_of(rcu_head, struct perf_buffer, rcu_head);
> +       free_uid(rb->mmap_user);
>         rb_free(rb);
>  }
>
> diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
> index 3e7de2661417..9fe92161715e 100644
> --- a/kernel/events/ring_buffer.c
> +++ b/kernel/events/ring_buffer.c
> @@ -340,6 +340,8 @@ ring_buffer_init(struct perf_buffer *rb, long watermark, int flags)
>                 rb->paused = 1;
>
>         mutex_init(&rb->aux_mutex);
> +       rb->mmap_user = get_current_user();
> +       refcount_set(&rb->mmap_count, 1);
>  }
>
>  void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags)
>
> base-commit: 77de62ad3de3967818c3dbe656b7336ebee461d2
> --
> 2.51.0
>
Re: [PATCH v3] perf/core: Fix refcount bug and potential UAF in perf_mmap
Posted by Peter Zijlstra 1 week, 1 day ago
On Wed, Mar 25, 2026 at 04:17:35PM +0100, Peter Zijlstra wrote:
> 
> Argh,. why is this hidden in this old thread :/
> 
> On Wed, Mar 25, 2026 at 06:20:53PM +0800, yuhaocheng035@gmail.com wrote:
> 
> > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > index 2c35acc2722b..a3228c587de1 100644
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -6730,9 +6730,10 @@ static void perf_pmu_output_stop(struct perf_event *event);
> >   * the buffer here, where we still have a VM context. This means we need
> >   * to detach all events redirecting to us.
> >   */
> > -static void perf_mmap_close(struct vm_area_struct *vma)
> > +static void __perf_mmap_close(struct vm_area_struct *vma, struct perf_event *event,
> > +			      bool holds_event_mmap_lock)
> >  {
> > -	struct perf_event *event = vma->vm_file->private_data;
> > +	struct perf_event *iter_event;
> >  	mapped_f unmapped = get_mapped(event, event_unmapped);
> >  	struct perf_buffer *rb = ring_buffer_get(event);
> >  	struct user_struct *mmap_user = rb->mmap_user;
> > @@ -6772,11 +6773,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
> >  	if (refcount_dec_and_test(&rb->mmap_count))
> >  		detach_rest = true;
> >  
> > -	if (!refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
> > +	if ((!holds_event_mmap_lock &&
> > +	     !refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) ||
> > +	    (holds_event_mmap_lock && !refcount_dec_and_test(&event->mmap_count)))
> >  		goto out_put;
> 
> *groan*, this is horrible.
> 
> Let me have a poke to see if there isn't a saner variant around.

Also, I just realized this patch doesn't even apply, it is against a
tree without 77de62ad3de3 ("perf/core: Fix refcount bug and potential
UAF in perf_mmap").
Re: [PATCH v3] perf/core: Fix refcount bug and potential UAF in perf_mmap
Posted by Ian Rogers 1 week, 1 day ago
On Wed, Mar 25, 2026 at 3:21 AM <yuhaocheng035@gmail.com> wrote:
>
> From: Haocheng Yu <yuhaocheng035@gmail.com>
>
> Syzkaller reported a refcount_t: addition on 0; use-after-free warning
> in perf_mmap.
>
> The issue is caused by a race condition between a failing mmap() setup
> and a concurrent mmap() on a dependent event (e.g., using output
> redirection).
>
> In perf_mmap(), the ring_buffer (rb) is allocated and assigned to
> event->rb with the mmap_mutex held. The mutex is then released to
> perform map_range().
>
> If map_range() fails, perf_mmap_close() is called to clean up.
> However, since the mutex was dropped, another thread attaching to
> this event (via inherited events or output redirection) can acquire
> the mutex, observe the valid event->rb pointer, and attempt to
> increment its reference count. If the cleanup path has already
> dropped the reference count to zero, this results in a
> use-after-free or refcount saturation warning.
>
> Fix this by extending the scope of mmap_mutex to cover the
> map_range() call. This ensures that the ring buffer initialization
> and mapping (or cleanup on failure) happens atomically effectively,
> preventing other threads from accessing a half-initialized or
> dying ring buffer.
>
> v2:
> Because expanding the guarded region would cause the event->mmap_mutex
> to be acquired repeatedly in the perf_mmap_close function, potentially
> leading to a self deadlock, the original logic of perf_mmap_close was
> retained, and the mutex-holding logic was modified to obtain the
> perf_mmap_close_locked function.
>
> v3:
> The fix is made smaller by passing the parameter "holds_event_mmap_mutex"
> to perf_mmap_close.
>
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202602020208.m7KIjdzW-lkp@intel.com/
> Suggested-by: Ian Rogers <irogers@google.com>
> Signed-off-by: Haocheng Yu <yuhaocheng035@gmail.com>

Reviewed-by: Ian Rogers <irogers@google.com>

Thanks,
Ian

> ---
>  kernel/events/core.c | 78 +++++++++++++++++++++++++++-----------------
>  1 file changed, 48 insertions(+), 30 deletions(-)
>
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 2c35acc2722b..a3228c587de1 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -6730,9 +6730,10 @@ static void perf_pmu_output_stop(struct perf_event *event);
>   * the buffer here, where we still have a VM context. This means we need
>   * to detach all events redirecting to us.
>   */
> -static void perf_mmap_close(struct vm_area_struct *vma)
> +static void __perf_mmap_close(struct vm_area_struct *vma, struct perf_event *event,
> +                             bool holds_event_mmap_lock)
>  {
> -       struct perf_event *event = vma->vm_file->private_data;
> +       struct perf_event *iter_event;
>         mapped_f unmapped = get_mapped(event, event_unmapped);
>         struct perf_buffer *rb = ring_buffer_get(event);
>         struct user_struct *mmap_user = rb->mmap_user;
> @@ -6772,11 +6773,14 @@ static void perf_mmap_close(struct vm_area_struct *vma)
>         if (refcount_dec_and_test(&rb->mmap_count))
>                 detach_rest = true;
>
> -       if (!refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
> +       if ((!holds_event_mmap_lock &&
> +            !refcount_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) ||
> +           (holds_event_mmap_lock && !refcount_dec_and_test(&event->mmap_count)))
>                 goto out_put;
>
>         ring_buffer_attach(event, NULL);
> -       mutex_unlock(&event->mmap_mutex);
> +       if (!holds_event_mmap_lock)
> +               mutex_unlock(&event->mmap_mutex);
>
>         /* If there's still other mmap()s of this buffer, we're done. */
>         if (!detach_rest)
> @@ -6789,8 +6793,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
>          */
>  again:
>         rcu_read_lock();
> -       list_for_each_entry_rcu(event, &rb->event_list, rb_entry) {
> -               if (!atomic_long_inc_not_zero(&event->refcount)) {
> +       list_for_each_entry_rcu(iter_event, &rb->event_list, rb_entry) {
> +               if (!atomic_long_inc_not_zero(&iter_event->refcount)) {
>                         /*
>                          * This event is en-route to free_event() which will
>                          * detach it and remove it from the list.
> @@ -6799,7 +6803,8 @@ static void perf_mmap_close(struct vm_area_struct *vma)
>                 }
>                 rcu_read_unlock();
>
> -               mutex_lock(&event->mmap_mutex);
> +               if (!holds_event_mmap_lock)
> +                       mutex_lock(&iter_event->mmap_mutex);
>                 /*
>                  * Check we didn't race with perf_event_set_output() which can
>                  * swizzle the rb from under us while we were waiting to
> @@ -6810,11 +6815,12 @@ static void perf_mmap_close(struct vm_area_struct *vma)
>                  * still restart the iteration to make sure we're not now
>                  * iterating the wrong list.
>                  */
> -               if (event->rb == rb)
> -                       ring_buffer_attach(event, NULL);
> +               if (iter_event->rb == rb)
> +                       ring_buffer_attach(iter_event, NULL);
>
> -               mutex_unlock(&event->mmap_mutex);
> -               put_event(event);
> +               if (!holds_event_mmap_lock)
> +                       mutex_unlock(&iter_event->mmap_mutex);
> +               put_event(iter_event);
>
>                 /*
>                  * Restart the iteration; either we're on the wrong list or
> @@ -6842,6 +6848,18 @@ static void perf_mmap_close(struct vm_area_struct *vma)
>         ring_buffer_put(rb); /* could be last */
>  }
>
> +static void perf_mmap_close(struct vm_area_struct *vma)
> +{
> +       struct perf_event *event = vma->vm_file->private_data;
> +
> +       __perf_mmap_close(vma, event, false);
> +}
> +
> +static void perf_mmap_close_locked(struct vm_area_struct *vma, struct perf_event *event)
> +{
> +       __perf_mmap_close(vma, event, true);
> +}
> +
>  static vm_fault_t perf_mmap_pfn_mkwrite(struct vm_fault *vmf)
>  {
>         /* The first page is the user control page, others are read-only. */
> @@ -7167,28 +7185,28 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
>                         ret = perf_mmap_aux(vma, event, nr_pages);
>                 if (ret)
>                         return ret;
> -       }
>
> -       /*
> -        * Since pinned accounting is per vm we cannot allow fork() to copy our
> -        * vma.
> -        */
> -       vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
> -       vma->vm_ops = &perf_mmap_vmops;
> +               /*
> +                * Since pinned accounting is per vm we cannot allow fork() to copy our
> +                * vma.
> +                */
> +               vm_flags_set(vma, VM_DONTCOPY | VM_DONTEXPAND | VM_DONTDUMP);
> +               vma->vm_ops = &perf_mmap_vmops;
>
> -       mapped = get_mapped(event, event_mapped);
> -       if (mapped)
> -               mapped(event, vma->vm_mm);
> +               mapped = get_mapped(event, event_mapped);
> +               if (mapped)
> +                       mapped(event, vma->vm_mm);
>
> -       /*
> -        * Try to map it into the page table. On fail, invoke
> -        * perf_mmap_close() to undo the above, as the callsite expects
> -        * full cleanup in this case and therefore does not invoke
> -        * vmops::close().
> -        */
> -       ret = map_range(event->rb, vma);
> -       if (ret)
> -               perf_mmap_close(vma);
> +               /*
> +                * Try to map it into the page table. On fail, invoke
> +                * perf_mmap_close() to undo the above, as the callsite expects
> +                * full cleanup in this case and therefore does not invoke
> +                * vmops::close().
> +                */
> +               ret = map_range(event->rb, vma);
> +               if (ret)
> +                       perf_mmap_close_locked(vma, event);
> +       }
>
>         return ret;
>  }
>
> base-commit: 7d0a66e4bb9081d75c82ec4957c50034cb0ea449
> --
> 2.51.0
>