[PATCH v3 6/7] perf: Rename perf_event_exit_task(.child)

Peter Zijlstra posted 7 patches 9 months, 1 week ago
[PATCH v3 6/7] perf: Rename perf_event_exit_task(.child)
Posted by Peter Zijlstra 9 months, 1 week ago
The task passed to perf_event_exit_task() is not a child, it is
current. Fix this confusing naming, since much of the rest of the code
also relies on it being current.

Specifically, both exec() and exit() callers use it with current as
the argument.

Notably, task_ctx_sched_out() doesn't make much sense outside of
current.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/events/core.c |   60 ++++++++++++++++++++++++++-------------------------
 1 file changed, 31 insertions(+), 29 deletions(-)

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -13488,15 +13488,15 @@ perf_event_exit_event(struct perf_event
 	perf_event_wakeup(event);
 }
 
-static void perf_event_exit_task_context(struct task_struct *child, bool exit)
+static void perf_event_exit_task_context(struct task_struct *task, bool exit)
 {
-	struct perf_event_context *child_ctx, *clone_ctx = NULL;
+	struct perf_event_context *ctx, *clone_ctx = NULL;
 	struct perf_event *child_event, *next;
 
-	WARN_ON_ONCE(child != current);
+	WARN_ON_ONCE(task != current);
 
-	child_ctx = perf_pin_task_context(child);
-	if (!child_ctx)
+	ctx = perf_pin_task_context(task);
+	if (!ctx)
 		return;
 
 	/*
@@ -13509,27 +13509,27 @@ static void perf_event_exit_task_context
 	 * without ctx::mutex (it cannot because of the move_group double mutex
 	 * lock thing). See the comments in perf_install_in_context().
 	 */
-	mutex_lock(&child_ctx->mutex);
+	mutex_lock(&ctx->mutex);
 
 	/*
 	 * In a single ctx::lock section, de-schedule the events and detach the
 	 * context from the task such that we cannot ever get it scheduled back
 	 * in.
 	 */
-	raw_spin_lock_irq(&child_ctx->lock);
-	task_ctx_sched_out(child_ctx, NULL, EVENT_ALL);
+	raw_spin_lock_irq(&ctx->lock);
+	task_ctx_sched_out(ctx, NULL, EVENT_ALL);
 
 	/*
 	 * Now that the context is inactive, destroy the task <-> ctx relation
 	 * and mark the context dead.
 	 */
-	RCU_INIT_POINTER(child->perf_event_ctxp, NULL);
-	put_ctx(child_ctx); /* cannot be last */
-	WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
+	RCU_INIT_POINTER(task->perf_event_ctxp, NULL);
+	put_ctx(ctx); /* cannot be last */
+	WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
 	put_task_struct(current); /* cannot be last */
 
-	clone_ctx = unclone_ctx(child_ctx);
-	raw_spin_unlock_irq(&child_ctx->lock);
+	clone_ctx = unclone_ctx(ctx);
+	raw_spin_unlock_irq(&ctx->lock);
 
 	if (clone_ctx)
 		put_ctx(clone_ctx);
@@ -13540,12 +13540,12 @@ static void perf_event_exit_task_context
 	 * get a few PERF_RECORD_READ events.
 	 */
 	if (exit)
-		perf_event_task(child, child_ctx, 0);
+		perf_event_task(task, ctx, 0);
 
-	list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
-		perf_event_exit_event(child_event, child_ctx);
+	list_for_each_entry_safe(child_event, next, &ctx->event_list, event_entry)
+		perf_event_exit_event(child_event, ctx);
 
-	mutex_unlock(&child_ctx->mutex);
+	mutex_unlock(&ctx->mutex);
 
 	if (!exit) {
 		/*
@@ -13561,24 +13561,26 @@ static void perf_event_exit_task_context
 		 *
 		 * Wait for all events to drop their context reference.
 		 */
-		wait_var_event(&child_ctx->refcount,
-			       refcount_read(&child_ctx->refcount) == 1);
+		wait_var_event(&ctx->refcount,
+			       refcount_read(&ctx->refcount) == 1);
 	}
-	put_ctx(child_ctx);
+	put_ctx(ctx);
 }
 
 /*
- * When a child task exits, feed back event values to parent events.
+ * When a task exits, feed back event values to parent events.
  *
  * Can be called with exec_update_lock held when called from
  * setup_new_exec().
  */
-void perf_event_exit_task(struct task_struct *child)
+void perf_event_exit_task(struct task_struct *task)
 {
 	struct perf_event *event, *tmp;
 
-	mutex_lock(&child->perf_event_mutex);
-	list_for_each_entry_safe(event, tmp, &child->perf_event_list,
+	WARN_ON_ONCE(task != current);
+
+	mutex_lock(&task->perf_event_mutex);
+	list_for_each_entry_safe(event, tmp, &task->perf_event_list,
 				 owner_entry) {
 		list_del_init(&event->owner_entry);
 
@@ -13589,17 +13591,17 @@ void perf_event_exit_task(struct task_st
 		 */
 		smp_store_release(&event->owner, NULL);
 	}
-	mutex_unlock(&child->perf_event_mutex);
+	mutex_unlock(&task->perf_event_mutex);
 
-	perf_event_exit_task_context(child, true);
+	perf_event_exit_task_context(task, true);
 
 	/*
 	 * The perf_event_exit_task_context calls perf_event_task
-	 * with child's task_ctx, which generates EXIT events for
-	 * child contexts and sets child->perf_event_ctxp[] to NULL.
+	 * with task's task_ctx, which generates EXIT events for
+	 * task contexts and sets task->perf_event_ctxp[] to NULL.
 	 * At this point we need to send EXIT events to cpu contexts.
 	 */
-	perf_event_task(child, NULL, 0);
+	perf_event_task(task, NULL, 0);
 }
 
 /*
Re: [PATCH v3 6/7] perf: Rename perf_event_exit_task(.child)
Posted by Ravi Bangoria 9 months, 1 week ago
Hi Peter,

On 08-Mar-25 1:03 AM, Peter Zijlstra wrote:
> The task passed to perf_event_exit_task() is not a child, it is
> current. Fix this confusing naming, since much of the rest of the code
> also relies on it being current.
> 
> Specifically, both exec() and exit() callers use it with current as
> the argument.

...

> -static void perf_event_exit_task_context(struct task_struct *child, bool exit)
> +static void perf_event_exit_task_context(struct task_struct *task, bool exit)
>  {
> -	struct perf_event_context *child_ctx, *clone_ctx = NULL;
> +	struct perf_event_context *ctx, *clone_ctx = NULL;
>  	struct perf_event *child_event, *next;
>  
> -	WARN_ON_ONCE(child != current);
> +	WARN_ON_ONCE(task != current);

exec() codepath (i.e. copy_process()) passes child pointer, not 'current'.

Thanks,
Ravi
Re: [PATCH v3 6/7] perf: Rename perf_event_exit_task(.child)
Posted by Peter Zijlstra 9 months, 1 week ago
On Mon, Mar 10, 2025 at 04:38:36PM +0530, Ravi Bangoria wrote:
> Hi Peter,
> 
> On 08-Mar-25 1:03 AM, Peter Zijlstra wrote:
> > The task passed to perf_event_exit_task() is not a child, it is
> > current. Fix this confusing naming, since much of the rest of the code
> > also relies on it being current.
> > 
> > Specifically, both exec() and exit() callers use it with current as
> > the argument.
> 
> ...
> 
> > -static void perf_event_exit_task_context(struct task_struct *child, bool exit)
> > +static void perf_event_exit_task_context(struct task_struct *task, bool exit)
> >  {
> > -	struct perf_event_context *child_ctx, *clone_ctx = NULL;
> > +	struct perf_event_context *ctx, *clone_ctx = NULL;
> >  	struct perf_event *child_event, *next;
> >  
> > -	WARN_ON_ONCE(child != current);
> > +	WARN_ON_ONCE(task != current);
> 
> exec() codepath (i.e. copy_process()) passes child pointer, not 'current'.

I am confused, this not a new warning. Also, copy_process() is clone(),
exec() is another code path.
Re: [PATCH v3 6/7] perf: Rename perf_event_exit_task(.child)
Posted by Ravi Bangoria 9 months, 1 week ago
On 10-Mar-25 8:17 PM, Peter Zijlstra wrote:
> On Mon, Mar 10, 2025 at 04:38:36PM +0530, Ravi Bangoria wrote:
>> Hi Peter,
>>
>> On 08-Mar-25 1:03 AM, Peter Zijlstra wrote:
>>> The task passed to perf_event_exit_task() is not a child, it is
>>> current. Fix this confusing naming, since much of the rest of the code
>>> also relies on it being current.
>>>
>>> Specifically, both exec() and exit() callers use it with current as
>>> the argument.
>>
>> ...
>>
>>> -static void perf_event_exit_task_context(struct task_struct *child, bool exit)
>>> +static void perf_event_exit_task_context(struct task_struct *task, bool exit)
>>>  {
>>> -	struct perf_event_context *child_ctx, *clone_ctx = NULL;
>>> +	struct perf_event_context *ctx, *clone_ctx = NULL;
>>>  	struct perf_event *child_event, *next;
>>>  
>>> -	WARN_ON_ONCE(child != current);
>>> +	WARN_ON_ONCE(task != current);
>>
>> exec() codepath (i.e. copy_process()) passes child pointer, not 'current'.
> 
> I am confused, this not a new warning.

Right, However the WARN was present only in perf_event_exit_task_context()
before merging it with perf_event_free_task() (patch #5). And
perf_event_free_task() is getting called for child task.

> Also, copy_process() is clone(), exec() is another code path.

My bad. I meant clone() code path:

  copy_process()
    p = dup_task_struct(current);
    perf_event_init_task(p);
      perf_event_free_task(p);
        perf_event_exit_task_context(p);
          WARN_ON_ONCE(task != current);

Another one:

  copy_process()
    p = dup_task_struct(current);
    ...
    bad_fork_cleanup_perf:
      perf_event_free_task(p);
        perf_event_exit_task_context(p);
          WARN_ON_ONCE(task != current);

Or am I missing something?

Thanks,
Ravi
Re: [PATCH v3 6/7] perf: Rename perf_event_exit_task(.child)
Posted by Peter Zijlstra 9 months, 1 week ago
On Mon, Mar 10, 2025 at 08:50:55PM +0530, Ravi Bangoria wrote:
> On 10-Mar-25 8:17 PM, Peter Zijlstra wrote:
> > On Mon, Mar 10, 2025 at 04:38:36PM +0530, Ravi Bangoria wrote:
> >> Hi Peter,
> >>
> >> On 08-Mar-25 1:03 AM, Peter Zijlstra wrote:
> >>> The task passed to perf_event_exit_task() is not a child, it is
> >>> current. Fix this confusing naming, since much of the rest of the code
> >>> also relies on it being current.
> >>>
> >>> Specifically, both exec() and exit() callers use it with current as
> >>> the argument.
> >>
> >> ...
> >>
> >>> -static void perf_event_exit_task_context(struct task_struct *child, bool exit)
> >>> +static void perf_event_exit_task_context(struct task_struct *task, bool exit)
> >>>  {
> >>> -	struct perf_event_context *child_ctx, *clone_ctx = NULL;
> >>> +	struct perf_event_context *ctx, *clone_ctx = NULL;
> >>>  	struct perf_event *child_event, *next;
> >>>  
> >>> -	WARN_ON_ONCE(child != current);
> >>> +	WARN_ON_ONCE(task != current);
> >>
> >> exec() codepath (i.e. copy_process()) passes child pointer, not 'current'.
> > 
> > I am confused, this not a new warning.
> 
> Right, However the WARN was present only in perf_event_exit_task_context()
> before merging it with perf_event_free_task() (patch #5). And
> perf_event_free_task() is getting called for child task.

Argh, yes.

> > Also, copy_process() is clone(), exec() is another code path.
> 
> My bad. I meant clone() code path:
> 
>   copy_process()
>     p = dup_task_struct(current);
>     perf_event_init_task(p);
>       perf_event_free_task(p);
>         perf_event_exit_task_context(p);
>           WARN_ON_ONCE(task != current);
> 
> Another one:
> 
>   copy_process()
>     p = dup_task_struct(current);
>     ...
>     bad_fork_cleanup_perf:
>       perf_event_free_task(p);
>         perf_event_exit_task_context(p);
>           WARN_ON_ONCE(task != current);
> 
> Or am I missing something?

No, the perf_event_free_task() callchain has a problem.

I'll remove that WARN_ON_ONCE() since perf_event_exit_task() has the
same check. I'll do that in the merge patch, not this rename patch.
[PATCH v3a 6/7] perf: Rename perf_event_exit_task(.child)
Posted by Peter Zijlstra 9 months, 1 week ago

The task passed to perf_event_exit_task() is not a child, it is
current. Fix this confusing naming, since much of the rest of the code
also relies on it being current.

Specifically, both exec() and exit() callers use it with current as
the argument.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/events/core.c |   54 ++++++++++++++++++++++++++-------------------------
 1 file changed, 28 insertions(+), 26 deletions(-)

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -13551,8 +13551,8 @@ static void perf_event_exit_task_context
 	struct perf_event_context *child_ctx, *clone_ctx = NULL;
 	struct perf_event *child_event, *next;
 
-	child_ctx = perf_pin_task_context(child);
-	if (!child_ctx)
+	ctx = perf_pin_task_context(task);
+	if (!ctx)
 		return;
 
 	/*
@@ -13565,27 +13565,27 @@ static void perf_event_exit_task_context
 	 * without ctx::mutex (it cannot because of the move_group double mutex
 	 * lock thing). See the comments in perf_install_in_context().
 	 */
-	mutex_lock(&child_ctx->mutex);
+	mutex_lock(&ctx->mutex);
 
 	/*
 	 * In a single ctx::lock section, de-schedule the events and detach the
 	 * context from the task such that we cannot ever get it scheduled back
 	 * in.
 	 */
-	raw_spin_lock_irq(&child_ctx->lock);
-	task_ctx_sched_out(child_ctx, NULL, EVENT_ALL);
+	raw_spin_lock_irq(&ctx->lock);
+	task_ctx_sched_out(ctx, NULL, EVENT_ALL);
 
 	/*
 	 * Now that the context is inactive, destroy the task <-> ctx relation
 	 * and mark the context dead.
 	 */
-	RCU_INIT_POINTER(child->perf_event_ctxp, NULL);
-	put_ctx(child_ctx); /* cannot be last */
-	WRITE_ONCE(child_ctx->task, TASK_TOMBSTONE);
+	RCU_INIT_POINTER(task->perf_event_ctxp, NULL);
+	put_ctx(ctx); /* cannot be last */
+	WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
 	put_task_struct(current); /* cannot be last */
 
-	clone_ctx = unclone_ctx(child_ctx);
-	raw_spin_unlock_irq(&child_ctx->lock);
+	clone_ctx = unclone_ctx(ctx);
+	raw_spin_unlock_irq(&ctx->lock);
 
 	if (clone_ctx)
 		put_ctx(clone_ctx);
@@ -13596,12 +13596,12 @@ static void perf_event_exit_task_context
 	 * get a few PERF_RECORD_READ events.
 	 */
 	if (exit)
-		perf_event_task(child, child_ctx, 0);
+		perf_event_task(task, ctx, 0);
 
-	list_for_each_entry_safe(child_event, next, &child_ctx->event_list, event_entry)
-		perf_event_exit_event(child_event, child_ctx);
+	list_for_each_entry_safe(child_event, next, &ctx->event_list, event_entry)
+		perf_event_exit_event(child_event, ctx);
 
-	mutex_unlock(&child_ctx->mutex);
+	mutex_unlock(&ctx->mutex);
 
 	if (!exit) {
 		/*
@@ -13617,24 +13617,26 @@ static void perf_event_exit_task_context
 		 *
 		 * Wait for all events to drop their context reference.
 		 */
-		wait_var_event(&child_ctx->refcount,
-			       refcount_read(&child_ctx->refcount) == 1);
+		wait_var_event(&ctx->refcount,
+			       refcount_read(&ctx->refcount) == 1);
 	}
-	put_ctx(child_ctx);
+	put_ctx(ctx);
 }
 
 /*
- * When a child task exits, feed back event values to parent events.
+ * When a task exits, feed back event values to parent events.
  *
  * Can be called with exec_update_lock held when called from
  * setup_new_exec().
  */
-void perf_event_exit_task(struct task_struct *child)
+void perf_event_exit_task(struct task_struct *task)
 {
 	struct perf_event *event, *tmp;
 
-	mutex_lock(&child->perf_event_mutex);
-	list_for_each_entry_safe(event, tmp, &child->perf_event_list,
+	WARN_ON_ONCE(task != current);
+
+	mutex_lock(&task->perf_event_mutex);
+	list_for_each_entry_safe(event, tmp, &task->perf_event_list,
 				 owner_entry) {
 		list_del_init(&event->owner_entry);
 
@@ -13645,17 +13647,17 @@ void perf_event_exit_task(struct task_st
 		 */
 		smp_store_release(&event->owner, NULL);
 	}
-	mutex_unlock(&child->perf_event_mutex);
+	mutex_unlock(&task->perf_event_mutex);
 
-	perf_event_exit_task_context(child, true);
+	perf_event_exit_task_context(task, true);
 
 	/*
 	 * The perf_event_exit_task_context calls perf_event_task
-	 * with child's task_ctx, which generates EXIT events for
-	 * child contexts and sets child->perf_event_ctxp[] to NULL.
+	 * with task's task_ctx, which generates EXIT events for
+	 * task contexts and sets task->perf_event_ctxp[] to NULL.
 	 * At this point we need to send EXIT events to cpu contexts.
 	 */
-	perf_event_task(child, NULL, 0);
+	perf_event_task(task, NULL, 0);
 }
 
 /*
Re: [PATCH v3a 6/7] perf: Rename perf_event_exit_task(.child)
Posted by Ravi Bangoria 9 months, 1 week ago
Hi Peter,

> The task passed to perf_event_exit_task() is not a child, it is
> current. Fix this confusing naming, since much of the rest of the code
> also relies on it being current.
> 
> Specifically, both exec() and exit() callers use it with current as
> the argument.

When perf_event_exit_task_context() gets called by perf_event_free_task():

1) task_ctx_sched_out(ctx) function should be avoided because the 'ctx'
   argument is of the (half baked)child task whereas task_ctx_sched_out()
   expects 'ctx' to be the context of 'current'.
2) Similarly, 'task' argument != 'current'.

--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -13573,7 +13573,8 @@ static void perf_event_exit_task_context(struct task_struct *task, bool exit)
 	 * in.
 	 */
 	raw_spin_lock_irq(&ctx->lock);
-	task_ctx_sched_out(ctx, NULL, EVENT_ALL);
+	if (exit)
+		task_ctx_sched_out(ctx, NULL, EVENT_ALL);
 
 	/*
 	 * Now that the context is inactive, destroy the task <-> ctx relation
@@ -13582,7 +13583,7 @@ static void perf_event_exit_task_context(struct task_struct *task, bool exit)
 	RCU_INIT_POINTER(task->perf_event_ctxp, NULL);
 	put_ctx(ctx); /* cannot be last */
 	WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
-	put_task_struct(current); /* cannot be last */
+	put_task_struct(task); /* cannot be last */
 
 	clone_ctx = unclone_ctx(ctx);
 	raw_spin_unlock_irq(&ctx->lock);

Thanks,
Ravi
Re: [PATCH v3a 6/7] perf: Rename perf_event_exit_task(.child)
Posted by Peter Zijlstra 9 months, 1 week ago
On Wed, Mar 12, 2025 at 12:01:00PM +0530, Ravi Bangoria wrote:
> Hi Peter,
> 
> > The task passed to perf_event_exit_task() is not a child, it is
> > current. Fix this confusing naming, since much of the rest of the code
> > also relies on it being current.
> > 
> > Specifically, both exec() and exit() callers use it with current as
> > the argument.
> 
> When perf_event_exit_task_context() gets called by perf_event_free_task():
> 
> 1) task_ctx_sched_out(ctx) function should be avoided because the 'ctx'
>    argument is of the (half baked)child task whereas task_ctx_sched_out()
>    expects 'ctx' to be the context of 'current'.
> 2) Similarly, 'task' argument != 'current'.
> 
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -13573,7 +13573,8 @@ static void perf_event_exit_task_context(struct task_struct *task, bool exit)
>  	 * in.
>  	 */
>  	raw_spin_lock_irq(&ctx->lock);
> -	task_ctx_sched_out(ctx, NULL, EVENT_ALL);
> +	if (exit)
> +		task_ctx_sched_out(ctx, NULL, EVENT_ALL);
>  
>  	/*
>  	 * Now that the context is inactive, destroy the task <-> ctx relation
> @@ -13582,7 +13583,7 @@ static void perf_event_exit_task_context(struct task_struct *task, bool exit)
>  	RCU_INIT_POINTER(task->perf_event_ctxp, NULL);
>  	put_ctx(ctx); /* cannot be last */
>  	WRITE_ONCE(ctx->task, TASK_TOMBSTONE);
> -	put_task_struct(current); /* cannot be last */
> +	put_task_struct(task); /* cannot be last */
>  
>  	clone_ctx = unclone_ctx(ctx);
>  	raw_spin_unlock_irq(&ctx->lock);

Right you are. Thanks!