[PATCH] release_task: kill the no longer needed get/put_pid(thread_pid)

Oleg Nesterov posted 1 patch 8 months, 1 week ago
kernel/exit.c | 7 ++-----
1 file changed, 2 insertions(+), 5 deletions(-)
[PATCH] release_task: kill the no longer needed get/put_pid(thread_pid)
Posted by Oleg Nesterov 8 months, 1 week ago
After the commit 7903f907a2260 ("pid: perform free_pid() calls outside
of tasklist_lock") __unhash_process() -> detach_pid() no longer calls
free_pid(), proc_flush_pid() can just use p->thread_pid without the
now pointless get_pid() + put_pid().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/exit.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index 1b51dc099f1e..96d639383f86 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -239,7 +239,6 @@ void release_task(struct task_struct *p)
 {
 	struct release_task_post post;
 	struct task_struct *leader;
-	struct pid *thread_pid;
 	int zap_leader;
 repeat:
 	memset(&post, 0, sizeof(post));
@@ -253,8 +252,6 @@ void release_task(struct task_struct *p)
 	pidfs_exit(p);
 	cgroup_release(p);
 
-	thread_pid = get_pid(p->thread_pid);
-
 	write_lock_irq(&tasklist_lock);
 	ptrace_release_task(p);
 	__exit_signal(&post, p);
@@ -282,8 +279,8 @@ void release_task(struct task_struct *p)
 	}
 
 	write_unlock_irq(&tasklist_lock);
-	proc_flush_pid(thread_pid);
-	put_pid(thread_pid);
+	/* p->thread_pid can't go away until free_pids() below */
+	proc_flush_pid(p->thread_pid);
 	add_device_randomness(&p->se.sum_exec_runtime,
 			      sizeof(p->se.sum_exec_runtime));
 	free_pids(post.pids);
-- 
2.25.1.362.g51ebf55
Re: [PATCH] release_task: kill the no longer needed get/put_pid(thread_pid)
Posted by Christian Brauner 8 months, 1 week ago
On Fri, Apr 11, 2025 at 02:18:57PM +0200, Oleg Nesterov wrote:
> After the commit 7903f907a2260 ("pid: perform free_pid() calls outside
> of tasklist_lock") __unhash_process() -> detach_pid() no longer calls
> free_pid(), proc_flush_pid() can just use p->thread_pid without the
> now pointless get_pid() + put_pid().
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> ---
>  kernel/exit.c | 7 ++-----
>  1 file changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 1b51dc099f1e..96d639383f86 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -239,7 +239,6 @@ void release_task(struct task_struct *p)
>  {
>  	struct release_task_post post;
>  	struct task_struct *leader;
> -	struct pid *thread_pid;
>  	int zap_leader;
>  repeat:
>  	memset(&post, 0, sizeof(post));
> @@ -253,8 +252,6 @@ void release_task(struct task_struct *p)
>  	pidfs_exit(p);
>  	cgroup_release(p);
>  
> -	thread_pid = get_pid(p->thread_pid);
> -
>  	write_lock_irq(&tasklist_lock);
>  	ptrace_release_task(p);
>  	__exit_signal(&post, p);
> @@ -282,8 +279,8 @@ void release_task(struct task_struct *p)
>  	}
>  
>  	write_unlock_irq(&tasklist_lock);
> -	proc_flush_pid(thread_pid);
> -	put_pid(thread_pid);
> +	/* p->thread_pid can't go away until free_pids() below */
> +	proc_flush_pid(p->thread_pid);

This cannot work though, right?
Because after __unhash_process() p->thread_pid may be NULL:

__unhash_process()
-> detach_pid()
   -> __change_pid()
      {
	struct pid **pid_ptr, *pid;

	pid_ptr = task_pid_ptr(task, type);

	*pid_ptr = NULL;

	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
		if (pid_has_task(pid, tmp)) /* will be false if @group_dead is true
			return;

	WARN_ON(pids[type]);
	pids[type] = pid;
      }

so this needs:

diff --git a/kernel/exit.c b/kernel/exit.c
index e6132ebdaed4..9232c4c684e9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -244,6 +244,7 @@ void release_task(struct task_struct *p)
 {
        struct release_task_post post;
        struct task_struct *leader;
+       struct pid *thread_pid = task_pid(p);
        int zap_leader;
 repeat:
        memset(&post, 0, sizeof(post));
@@ -285,7 +286,7 @@ void release_task(struct task_struct *p)

        write_unlock_irq(&tasklist_lock);
        /* p->thread_pid can't go away until free_pids() below */
-       proc_flush_pid(p->thread_pid);
+       proc_flush_pid(thread_pid);
        add_device_randomness(&p->se.sum_exec_runtime,
                              sizeof(p->se.sum_exec_runtime));
        free_pids(post.pids);

I've folded this diff into your patch, Oleg. Let me know if you see any
additional issues with this.
Re: [PATCH] release_task: kill the no longer needed get/put_pid(thread_pid)
Posted by Christian Brauner 8 months, 1 week ago
On Mon, Apr 14, 2025 at 09:39:47PM +0200, Christian Brauner wrote:
> On Fri, Apr 11, 2025 at 02:18:57PM +0200, Oleg Nesterov wrote:
> > After the commit 7903f907a2260 ("pid: perform free_pid() calls outside
> > of tasklist_lock") __unhash_process() -> detach_pid() no longer calls
> > free_pid(), proc_flush_pid() can just use p->thread_pid without the
> > now pointless get_pid() + put_pid().
> > 
> > Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> > ---
> >  kernel/exit.c | 7 ++-----
> >  1 file changed, 2 insertions(+), 5 deletions(-)
> > 
> > diff --git a/kernel/exit.c b/kernel/exit.c
> > index 1b51dc099f1e..96d639383f86 100644
> > --- a/kernel/exit.c
> > +++ b/kernel/exit.c
> > @@ -239,7 +239,6 @@ void release_task(struct task_struct *p)
> >  {
> >  	struct release_task_post post;
> >  	struct task_struct *leader;
> > -	struct pid *thread_pid;
> >  	int zap_leader;
> >  repeat:
> >  	memset(&post, 0, sizeof(post));
> > @@ -253,8 +252,6 @@ void release_task(struct task_struct *p)
> >  	pidfs_exit(p);
> >  	cgroup_release(p);
> >  
> > -	thread_pid = get_pid(p->thread_pid);
> > -
> >  	write_lock_irq(&tasklist_lock);
> >  	ptrace_release_task(p);
> >  	__exit_signal(&post, p);
> > @@ -282,8 +279,8 @@ void release_task(struct task_struct *p)
> >  	}
> >  
> >  	write_unlock_irq(&tasklist_lock);
> > -	proc_flush_pid(thread_pid);
> > -	put_pid(thread_pid);
> > +	/* p->thread_pid can't go away until free_pids() below */
> > +	proc_flush_pid(p->thread_pid);
> 
> This cannot work though, right?
> Because after __unhash_process() p->thread_pid may be NULL:
> 
> __unhash_process()
> -> detach_pid()
>    -> __change_pid()
>       {
> 	struct pid **pid_ptr, *pid;
> 
> 	pid_ptr = task_pid_ptr(task, type);
> 
> 	*pid_ptr = NULL;
> 
> 	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
> 		if (pid_has_task(pid, tmp)) /* will be false if @group_dead is true
> 			return;
> 
> 	WARN_ON(pids[type]);
> 	pids[type] = pid;
>       }
> 
> so this needs:
> 
> diff --git a/kernel/exit.c b/kernel/exit.c
> index e6132ebdaed4..9232c4c684e9 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -244,6 +244,7 @@ void release_task(struct task_struct *p)
>  {
>         struct release_task_post post;
>         struct task_struct *leader;
> +       struct pid *thread_pid = task_pid(p);
>         int zap_leader;
>  repeat:
>         memset(&post, 0, sizeof(post));
> @@ -285,7 +286,7 @@ void release_task(struct task_struct *p)
> 
>         write_unlock_irq(&tasklist_lock);
>         /* p->thread_pid can't go away until free_pids() below */
> -       proc_flush_pid(p->thread_pid);
> +       proc_flush_pid(thread_pid);
>         add_device_randomness(&p->se.sum_exec_runtime,
>                               sizeof(p->se.sum_exec_runtime));
>         free_pids(post.pids);
> 
> I've folded this diff into your patch, Oleg. Let me know if you see any
> additional issues with this.

The task_pid() needs to be moved after the repeat label. I'm appending
the full patch I applied.
Re: [PATCH] release_task: kill the no longer needed get/put_pid(thread_pid)
Posted by Oleg Nesterov 8 months, 1 week ago
On 04/14, Christian Brauner wrote:
>
> On Mon, Apr 14, 2025 at 09:39:47PM +0200, Christian Brauner wrote:
> > On Fri, Apr 11, 2025 at 02:18:57PM +0200, Oleg Nesterov wrote:
> > > -	put_pid(thread_pid);
> > > +	/* p->thread_pid can't go away until free_pids() below */
> > > +	proc_flush_pid(p->thread_pid);
> >
> > This cannot work though, right?
> > Because after __unhash_process() p->thread_pid may be NULL:

Oh, indeed! What was I thinking about???

And, as you can guess, I didn't even bother to test this "obvious" cleanup :/

> The task_pid() needs to be moved after the repeat label. I'm appending
> the full patch I applied.

Thanks a lot!

Can you add your Co-developed-by or Fixed-by ?

Oleg.
Re: [PATCH] release_task: kill the no longer needed get/put_pid(thread_pid)
Posted by Mateusz Guzik 8 months, 1 week ago
On Mon, Apr 14, 2025 at 9:45 PM Christian Brauner <brauner@kernel.org> wrote:
>
> On Mon, Apr 14, 2025 at 09:39:47PM +0200, Christian Brauner wrote:
> > On Fri, Apr 11, 2025 at 02:18:57PM +0200, Oleg Nesterov wrote:
> > > After the commit 7903f907a2260 ("pid: perform free_pid() calls outside
> > > of tasklist_lock") __unhash_process() -> detach_pid() no longer calls
> > > free_pid(), proc_flush_pid() can just use p->thread_pid without the
> > > now pointless get_pid() + put_pid().
> > >
> > > Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> > > ---
> > >  kernel/exit.c | 7 ++-----
> > >  1 file changed, 2 insertions(+), 5 deletions(-)
> > >
> > > diff --git a/kernel/exit.c b/kernel/exit.c
> > > index 1b51dc099f1e..96d639383f86 100644
> > > --- a/kernel/exit.c
> > > +++ b/kernel/exit.c
> > > @@ -239,7 +239,6 @@ void release_task(struct task_struct *p)
> > >  {
> > >     struct release_task_post post;
> > >     struct task_struct *leader;
> > > -   struct pid *thread_pid;
> > >     int zap_leader;
> > >  repeat:
> > >     memset(&post, 0, sizeof(post));
> > > @@ -253,8 +252,6 @@ void release_task(struct task_struct *p)
> > >     pidfs_exit(p);
> > >     cgroup_release(p);
> > >
> > > -   thread_pid = get_pid(p->thread_pid);
> > > -
> > >     write_lock_irq(&tasklist_lock);
> > >     ptrace_release_task(p);
> > >     __exit_signal(&post, p);
> > > @@ -282,8 +279,8 @@ void release_task(struct task_struct *p)
> > >     }
> > >
> > >     write_unlock_irq(&tasklist_lock);
> > > -   proc_flush_pid(thread_pid);
> > > -   put_pid(thread_pid);
> > > +   /* p->thread_pid can't go away until free_pids() below */
> > > +   proc_flush_pid(p->thread_pid);
> >
> > This cannot work though, right?
> > Because after __unhash_process() p->thread_pid may be NULL:
> >
> > __unhash_process()
> > -> detach_pid()
> >    -> __change_pid()
> >       {
> >       struct pid **pid_ptr, *pid;
> >
> >       pid_ptr = task_pid_ptr(task, type);
> >
> >       *pid_ptr = NULL;
> >
> >       for (tmp = PIDTYPE_MAX; --tmp >= 0; )
> >               if (pid_has_task(pid, tmp)) /* will be false if @group_dead is true
> >                       return;
> >
> >       WARN_ON(pids[type]);
> >       pids[type] = pid;
> >       }
> >
> > so this needs:
> >
> > diff --git a/kernel/exit.c b/kernel/exit.c
> > index e6132ebdaed4..9232c4c684e9 100644
> > --- a/kernel/exit.c
> > +++ b/kernel/exit.c
> > @@ -244,6 +244,7 @@ void release_task(struct task_struct *p)
> >  {
> >         struct release_task_post post;
> >         struct task_struct *leader;
> > +       struct pid *thread_pid = task_pid(p);
> >         int zap_leader;
> >  repeat:
> >         memset(&post, 0, sizeof(post));
> > @@ -285,7 +286,7 @@ void release_task(struct task_struct *p)
> >
> >         write_unlock_irq(&tasklist_lock);
> >         /* p->thread_pid can't go away until free_pids() below */
> > -       proc_flush_pid(p->thread_pid);
> > +       proc_flush_pid(thread_pid);
> >         add_device_randomness(&p->se.sum_exec_runtime,
> >                               sizeof(p->se.sum_exec_runtime));
> >         free_pids(post.pids);
> >
> > I've folded this diff into your patch, Oleg. Let me know if you see any
> > additional issues with this.
>
> The task_pid() needs to be moved after the repeat label. I'm appending
> the full patch I applied.

oh heh, ack on that

but while here perhaps a small stylistic cleanup: move
add_device_randomness before or after proc_flush_pid + free_pids,
instead of if being in-between

-- 
Mateusz Guzik <mjguzik gmail.com>
Re: [PATCH] release_task: kill the no longer needed get/put_pid(thread_pid)
Posted by Oleg Nesterov 8 months, 1 week ago
On 04/14, Mateusz Guzik wrote:
>
> > The task_pid() needs to be moved after the repeat label. I'm appending
> > the full patch I applied.
>
> oh heh, ack on that

Thanks...

> but while here perhaps a small stylistic cleanup: move
> add_device_randomness before or after proc_flush_pid + free_pids,
> instead of if being in-between

Agreed, I thought about this too from the very beginning.

I'd prefer to move add_device_randomness() after release_thread(),
but perhaps this needs another minor cleanup?

Oleg.
Re: [PATCH] release_task: kill the no longer needed get/put_pid(thread_pid)
Posted by Mateusz Guzik 8 months, 1 week ago
On Mon, Apr 14, 2025 at 10:46 PM Oleg Nesterov <oleg@redhat.com> wrote:
> I'd prefer to move add_device_randomness() after release_thread(),
> but perhaps this needs another minor cleanup?
>

I have no opinion on that front. As far as cosmetics go I would not
touch it past the nit I mentioned in my previous e-mail, but I'm not
going to protest any other changes.

imo the real thing to do concerning the routine is to figure out if
the call is even of any benefit -- it very well may be this is call is
only there because nobody with random-fu bothered to remove it.
Personally I have epsilon knowledge of that area, so I'm not even
going to try to evaluate it. But it would be nice if someone(tm)
reached out to random folk concerning it. Worst case, even if it still
has to be there, maybe the sucker can trylock and bail. It does
contribute to contention, likely for no good reason.

-- 
Mateusz Guzik <mjguzik gmail.com>
Re: [PATCH] release_task: kill the no longer needed get/put_pid(thread_pid)
Posted by Christian Brauner 8 months, 1 week ago
On Mon, Apr 14, 2025 at 11:26:35PM +0200, Mateusz Guzik wrote:
> On Mon, Apr 14, 2025 at 10:46 PM Oleg Nesterov <oleg@redhat.com> wrote:
> > I'd prefer to move add_device_randomness() after release_thread(),
> > but perhaps this needs another minor cleanup?
> >
> 
> I have no opinion on that front. As far as cosmetics go I would not
> touch it past the nit I mentioned in my previous e-mail, but I'm not
> going to protest any other changes.
> 
> imo the real thing to do concerning the routine is to figure out if
> the call is even of any benefit -- it very well may be this is call is
> only there because nobody with random-fu bothered to remove it.
> Personally I have epsilon knowledge of that area, so I'm not even
> going to try to evaluate it. But it would be nice if someone(tm)
> reached out to random folk concerning it. Worst case, even if it still
> has to be there, maybe the sucker can trylock and bail. It does
> contribute to contention, likely for no good reason.

Afaict, add_device_randomness() cannot block. So why can't we just move
this into put_task_struct_rcu_user() (or some variant that's local to
kernel/exit.c)? I see no reason why this would need to be done
synchronously?
Re: [PATCH] release_task: kill the no longer needed get/put_pid(thread_pid)
Posted by Mateusz Guzik 8 months, 1 week ago
On Tue, Apr 15, 2025 at 9:35 AM Christian Brauner <brauner@kernel.org> wrote:
>
> On Mon, Apr 14, 2025 at 11:26:35PM +0200, Mateusz Guzik wrote:
> > On Mon, Apr 14, 2025 at 10:46 PM Oleg Nesterov <oleg@redhat.com> wrote:
> > > I'd prefer to move add_device_randomness() after release_thread(),
> > > but perhaps this needs another minor cleanup?
> > >
> >
> > I have no opinion on that front. As far as cosmetics go I would not
> > touch it past the nit I mentioned in my previous e-mail, but I'm not
> > going to protest any other changes.
> >
> > imo the real thing to do concerning the routine is to figure out if
> > the call is even of any benefit -- it very well may be this is call is
> > only there because nobody with random-fu bothered to remove it.
> > Personally I have epsilon knowledge of that area, so I'm not even
> > going to try to evaluate it. But it would be nice if someone(tm)
> > reached out to random folk concerning it. Worst case, even if it still
> > has to be there, maybe the sucker can trylock and bail. It does
> > contribute to contention, likely for no good reason.
>
> Afaict, add_device_randomness() cannot block. So why can't we just move
> this into put_task_struct_rcu_user() (or some variant that's local to
> kernel/exit.c)? I see no reason why this would need to be done
> synchronously?

It can move anywhere as long as ->se.sum_exec_runtime is not scrambled.

I'm not going to argue where it should land. I do note chances are the
call can be straight up removed and if I was looking to shuffle it
around some more, I would first try to find out if it can in fact just
go. I don't believe it warrants any effort at this stage though.

That said, apart from this remark I'm not going to argue about any placement.
-- 
Mateusz Guzik <mjguzik gmail.com>
Re: [PATCH] release_task: kill the no longer needed get/put_pid(thread_pid)
Posted by Christian Brauner 8 months, 1 week ago
On Fri, 11 Apr 2025 14:18:57 +0200, Oleg Nesterov wrote:
> After the commit 7903f907a2260 ("pid: perform free_pid() calls outside
> of tasklist_lock") __unhash_process() -> detach_pid() no longer calls
> free_pid(), proc_flush_pid() can just use p->thread_pid without the
> now pointless get_pid() + put_pid().
> 
> 

Applied to the vfs-6.16.pidfs branch of the vfs/vfs.git tree.
Patches in the vfs-6.16.pidfs branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs-6.16.pidfs

[1/1] release_task: kill the no longer needed get/put_pid(thread_pid)
      https://git.kernel.org/vfs/vfs/c/c9e3b2f77268
Re: [PATCH] release_task: kill the no longer needed get/put_pid(thread_pid)
Posted by Mateusz Guzik 8 months, 1 week ago
On Fri, Apr 11, 2025 at 2:19 PM Oleg Nesterov <oleg@redhat.com> wrote:
>
> After the commit 7903f907a2260 ("pid: perform free_pid() calls outside
> of tasklist_lock") __unhash_process() -> detach_pid() no longer calls
> free_pid(), proc_flush_pid() can just use p->thread_pid without the
> now pointless get_pid() + put_pid().
>
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> ---
>  kernel/exit.c | 7 ++-----
>  1 file changed, 2 insertions(+), 5 deletions(-)
>
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 1b51dc099f1e..96d639383f86 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -239,7 +239,6 @@ void release_task(struct task_struct *p)
>  {
>         struct release_task_post post;
>         struct task_struct *leader;
> -       struct pid *thread_pid;
>         int zap_leader;
>  repeat:
>         memset(&post, 0, sizeof(post));
> @@ -253,8 +252,6 @@ void release_task(struct task_struct *p)
>         pidfs_exit(p);
>         cgroup_release(p);
>
> -       thread_pid = get_pid(p->thread_pid);
> -
>         write_lock_irq(&tasklist_lock);
>         ptrace_release_task(p);
>         __exit_signal(&post, p);
> @@ -282,8 +279,8 @@ void release_task(struct task_struct *p)
>         }
>
>         write_unlock_irq(&tasklist_lock);
> -       proc_flush_pid(thread_pid);
> -       put_pid(thread_pid);
> +       /* p->thread_pid can't go away until free_pids() below */
> +       proc_flush_pid(p->thread_pid);
>         add_device_randomness(&p->se.sum_exec_runtime,
>                               sizeof(p->se.sum_exec_runtime));
>         free_pids(post.pids);

I'm trying to remember why I did not just remove it.

Interestingly I see my v2 *did* do the same thing:
https://lore.kernel.org/all/20250128160743.3142544-1-mjguzik@gmail.com/

+ proc_flush_pid(p->thread_pid);

I guess it fell through the cracks during reworks, shit happens.

that said:
Reviewed-by: Mateusz Guzik <mjguzik@gmail.com>

thanks

-- 
Mateusz Guzik <mjguzik gmail.com>