[PATCH] exit: call add_device_randomness() without tasklist_lock

Mateusz Guzik posted 1 patch 1 year ago
kernel/exit.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)
[PATCH] exit: call add_device_randomness() without tasklist_lock
Posted by Mateusz Guzik 1 year ago
Parallel calls to add_device_randomness() contend in their own right.

The clone side aleady runs outside of tasklist_lock, which in turn means
any caller on the exit side extends the tasklist_lock hold time while
contending on the random-private lock.

Fixing this problem bumps thread creation/destruction rate by 4% on a
24 core vm.

Bench (plop into will-it-scale):
$ cat tests/threadspawn1.c

char *testcase_description = "Thread creation and teardown";

static void *worker(void *arg)
{
        return (NULL);
}

void testcase(unsigned long long *iterations, unsigned long nr)
{
        pthread_t thread;
        int error;

        while (1) {
                error = pthread_create(&thread, NULL, worker, NULL);
                assert(error == 0);
                error = pthread_join(thread, NULL);
                assert(error == 0);
                (*iterations)++;
        }
}

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
---
 kernel/exit.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/kernel/exit.c b/kernel/exit.c
index 1dcddfe537ee..8a9ac55dc26e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -141,13 +141,14 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
 /*
  * This function expects the tasklist_lock write-locked.
  */
-static void __exit_signal(struct task_struct *tsk)
+static unsigned long long __exit_signal(struct task_struct *tsk)
 {
 	struct signal_struct *sig = tsk->signal;
 	bool group_dead = thread_group_leader(tsk);
 	struct sighand_struct *sighand;
 	struct tty_struct *tty;
 	u64 utime, stime;
+	unsigned long long randomness;
 
 	sighand = rcu_dereference_check(tsk->sighand,
 					lockdep_tasklist_lock_is_held());
@@ -174,8 +175,7 @@ static void __exit_signal(struct task_struct *tsk)
 			sig->curr_target = next_thread(tsk);
 	}
 
-	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-			      sizeof(unsigned long long));
+	randomness = tsk->se.sum_exec_runtime;
 
 	/*
 	 * Accumulate here the counters for all threads as they die. We could
@@ -214,6 +214,8 @@ static void __exit_signal(struct task_struct *tsk)
 		flush_sigqueue(&sig->shared_pending);
 		tty_kref_put(tty);
 	}
+
+	return randomness;
 }
 
 static void delayed_put_task_struct(struct rcu_head *rhp)
@@ -242,6 +244,7 @@ void release_task(struct task_struct *p)
 	struct task_struct *leader;
 	struct pid *thread_pid;
 	int zap_leader;
+	unsigned long long randomness;
 repeat:
 	/* don't need to get the RCU readlock here - the process is dead and
 	 * can't be modifying its own credentials. But shut RCU-lockdep up */
@@ -254,7 +257,7 @@ void release_task(struct task_struct *p)
 	write_lock_irq(&tasklist_lock);
 	ptrace_release_task(p);
 	thread_pid = get_pid(p->thread_pid);
-	__exit_signal(p);
+	randomness = __exit_signal(p);
 
 	/*
 	 * If we are the last non-leader member of the thread
@@ -280,6 +283,8 @@ void release_task(struct task_struct *p)
 	put_pid(thread_pid);
 	release_thread(p);
 	put_task_struct_rcu_user(p);
+	add_device_randomness((const void*) &randomness,
+			      sizeof(unsigned long long));
 
 	p = leader;
 	if (unlikely(zap_leader))
-- 
2.43.0
Re: [PATCH] exit: call add_device_randomness() without tasklist_lock
Posted by Oleg Nesterov 1 year ago
On 01/28, Mateusz Guzik wrote:
>
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -141,13 +141,14 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
>  /*
>   * This function expects the tasklist_lock write-locked.
>   */
> -static void __exit_signal(struct task_struct *tsk)
> +static unsigned long long __exit_signal(struct task_struct *tsk)
>  {
>  	struct signal_struct *sig = tsk->signal;
>  	bool group_dead = thread_group_leader(tsk);
>  	struct sighand_struct *sighand;
>  	struct tty_struct *tty;
>  	u64 utime, stime;
> +	unsigned long long randomness;
>  
>  	sighand = rcu_dereference_check(tsk->sighand,
>  					lockdep_tasklist_lock_is_held());
> @@ -174,8 +175,7 @@ static void __exit_signal(struct task_struct *tsk)
>  			sig->curr_target = next_thread(tsk);
>  	}
>  
> -	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
> -			      sizeof(unsigned long long));
> +	randomness = tsk->se.sum_exec_runtime;
>  
>  	/*
>  	 * Accumulate here the counters for all threads as they die. We could
> @@ -214,6 +214,8 @@ static void __exit_signal(struct task_struct *tsk)
>  		flush_sigqueue(&sig->shared_pending);
>  		tty_kref_put(tty);
>  	}
> +
> +	return randomness;
>  }
>  
>  static void delayed_put_task_struct(struct rcu_head *rhp)
> @@ -242,6 +244,7 @@ void release_task(struct task_struct *p)
>  	struct task_struct *leader;
>  	struct pid *thread_pid;
>  	int zap_leader;
> +	unsigned long long randomness;
>  repeat:
>  	/* don't need to get the RCU readlock here - the process is dead and
>  	 * can't be modifying its own credentials. But shut RCU-lockdep up */
> @@ -254,7 +257,7 @@ void release_task(struct task_struct *p)
>  	write_lock_irq(&tasklist_lock);
>  	ptrace_release_task(p);
>  	thread_pid = get_pid(p->thread_pid);
> -	__exit_signal(p);
> +	randomness = __exit_signal(p);
>  
>  	/*
>  	 * If we are the last non-leader member of the thread
> @@ -280,6 +283,8 @@ void release_task(struct task_struct *p)
>  	put_pid(thread_pid);
>  	release_thread(p);
>  	put_task_struct_rcu_user(p);
> +	add_device_randomness((const void*) &randomness,
> +			      sizeof(unsigned long long));
>  
>  	p = leader;
>  	if (unlikely(zap_leader))

Hmm, why not simply shift add_device_randomness(sum_exec_runtime) to
release_release_task() ?

Oleg.

--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -174,9 +174,6 @@ static void __exit_signal(struct task_struct *tsk)
 			sig->curr_target = next_thread(tsk);
 	}
 
-	add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
-			      sizeof(unsigned long long));
-
 	/*
 	 * Accumulate here the counters for all threads as they die. We could
 	 * skip the group leader because it is the last user of signal_struct,
@@ -279,6 +276,8 @@ void release_task(struct task_struct *p)
 	proc_flush_pid(thread_pid);
 	put_pid(thread_pid);
 	release_thread(p);
+	add_device_randomness((const void*)&p->se.sum_exec_runtime,
+			      sizeof(unsigned long long));
 	put_task_struct_rcu_user(p);
 
 	p = leader;
Re: [PATCH] exit: call add_device_randomness() without tasklist_lock
Posted by Mateusz Guzik 1 year ago
On Tue, Jan 28, 2025 at 4:21 PM Oleg Nesterov <oleg@redhat.com> wrote:
>
> On 01/28, Mateusz Guzik wrote:
> >
> > --- a/kernel/exit.c
> > +++ b/kernel/exit.c
> > @@ -141,13 +141,14 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
> >  /*
> >   * This function expects the tasklist_lock write-locked.
> >   */
> > -static void __exit_signal(struct task_struct *tsk)
> > +static unsigned long long __exit_signal(struct task_struct *tsk)
> >  {
> >       struct signal_struct *sig = tsk->signal;
> >       bool group_dead = thread_group_leader(tsk);
> >       struct sighand_struct *sighand;
> >       struct tty_struct *tty;
> >       u64 utime, stime;
> > +     unsigned long long randomness;
> >
> >       sighand = rcu_dereference_check(tsk->sighand,
> >                                       lockdep_tasklist_lock_is_held());
> > @@ -174,8 +175,7 @@ static void __exit_signal(struct task_struct *tsk)
> >                       sig->curr_target = next_thread(tsk);
> >       }
> >
> > -     add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
> > -                           sizeof(unsigned long long));
> > +     randomness = tsk->se.sum_exec_runtime;
> >
> >       /*
> >        * Accumulate here the counters for all threads as they die. We could
> > @@ -214,6 +214,8 @@ static void __exit_signal(struct task_struct *tsk)
> >               flush_sigqueue(&sig->shared_pending);
> >               tty_kref_put(tty);
> >       }
> > +
> > +     return randomness;
> >  }
> >
> >  static void delayed_put_task_struct(struct rcu_head *rhp)
> > @@ -242,6 +244,7 @@ void release_task(struct task_struct *p)
> >       struct task_struct *leader;
> >       struct pid *thread_pid;
> >       int zap_leader;
> > +     unsigned long long randomness;
> >  repeat:
> >       /* don't need to get the RCU readlock here - the process is dead and
> >        * can't be modifying its own credentials. But shut RCU-lockdep up */
> > @@ -254,7 +257,7 @@ void release_task(struct task_struct *p)
> >       write_lock_irq(&tasklist_lock);
> >       ptrace_release_task(p);
> >       thread_pid = get_pid(p->thread_pid);
> > -     __exit_signal(p);
> > +     randomness = __exit_signal(p);
> >
> >       /*
> >        * If we are the last non-leader member of the thread
> > @@ -280,6 +283,8 @@ void release_task(struct task_struct *p)
> >       put_pid(thread_pid);
> >       release_thread(p);
> >       put_task_struct_rcu_user(p);
> > +     add_device_randomness((const void*) &randomness,
> > +                           sizeof(unsigned long long));
> >
> >       p = leader;
> >       if (unlikely(zap_leader))
>
> Hmm, why not simply shift add_device_randomness(sum_exec_runtime) to
> release_release_task() ?
>

I wanted to keep the load where it was, but see my follow up. :)


-- 
Mateusz Guzik <mjguzik gmail.com>
Re: [PATCH] exit: call add_device_randomness() without tasklist_lock
Posted by Mateusz Guzik 1 year ago
scratch it. on second look more work can be easily pulled out of the
lock (notably pid freeing, which also contends vs clone). i'm going to
send a new patch when I get around to it.

On Tue, Jan 28, 2025 at 3:07 PM Mateusz Guzik <mjguzik@gmail.com> wrote:
>
> Parallel calls to add_device_randomness() contend in their own right.
>
> The clone side aleady runs outside of tasklist_lock, which in turn means
> any caller on the exit side extends the tasklist_lock hold time while
> contending on the random-private lock.
>
> Fixing this problem bumps thread creation/destruction rate by 4% on a
> 24 core vm.
>
> Bench (plop into will-it-scale):
> $ cat tests/threadspawn1.c
>
> char *testcase_description = "Thread creation and teardown";
>
> static void *worker(void *arg)
> {
>         return (NULL);
> }
>
> void testcase(unsigned long long *iterations, unsigned long nr)
> {
>         pthread_t thread;
>         int error;
>
>         while (1) {
>                 error = pthread_create(&thread, NULL, worker, NULL);
>                 assert(error == 0);
>                 error = pthread_join(thread, NULL);
>                 assert(error == 0);
>                 (*iterations)++;
>         }
> }
>
> Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
> ---
>  kernel/exit.c | 13 +++++++++----
>  1 file changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/exit.c b/kernel/exit.c
> index 1dcddfe537ee..8a9ac55dc26e 100644
> --- a/kernel/exit.c
> +++ b/kernel/exit.c
> @@ -141,13 +141,14 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
>  /*
>   * This function expects the tasklist_lock write-locked.
>   */
> -static void __exit_signal(struct task_struct *tsk)
> +static unsigned long long __exit_signal(struct task_struct *tsk)
>  {
>         struct signal_struct *sig = tsk->signal;
>         bool group_dead = thread_group_leader(tsk);
>         struct sighand_struct *sighand;
>         struct tty_struct *tty;
>         u64 utime, stime;
> +       unsigned long long randomness;
>
>         sighand = rcu_dereference_check(tsk->sighand,
>                                         lockdep_tasklist_lock_is_held());
> @@ -174,8 +175,7 @@ static void __exit_signal(struct task_struct *tsk)
>                         sig->curr_target = next_thread(tsk);
>         }
>
> -       add_device_randomness((const void*) &tsk->se.sum_exec_runtime,
> -                             sizeof(unsigned long long));
> +       randomness = tsk->se.sum_exec_runtime;
>
>         /*
>          * Accumulate here the counters for all threads as they die. We could
> @@ -214,6 +214,8 @@ static void __exit_signal(struct task_struct *tsk)
>                 flush_sigqueue(&sig->shared_pending);
>                 tty_kref_put(tty);
>         }
> +
> +       return randomness;
>  }
>
>  static void delayed_put_task_struct(struct rcu_head *rhp)
> @@ -242,6 +244,7 @@ void release_task(struct task_struct *p)
>         struct task_struct *leader;
>         struct pid *thread_pid;
>         int zap_leader;
> +       unsigned long long randomness;
>  repeat:
>         /* don't need to get the RCU readlock here - the process is dead and
>          * can't be modifying its own credentials. But shut RCU-lockdep up */
> @@ -254,7 +257,7 @@ void release_task(struct task_struct *p)
>         write_lock_irq(&tasklist_lock);
>         ptrace_release_task(p);
>         thread_pid = get_pid(p->thread_pid);
> -       __exit_signal(p);
> +       randomness = __exit_signal(p);
>
>         /*
>          * If we are the last non-leader member of the thread
> @@ -280,6 +283,8 @@ void release_task(struct task_struct *p)
>         put_pid(thread_pid);
>         release_thread(p);
>         put_task_struct_rcu_user(p);
> +       add_device_randomness((const void*) &randomness,
> +                             sizeof(unsigned long long));
>
>         p = leader;
>         if (unlikely(zap_leader))
> --
> 2.43.0
>


-- 
Mateusz Guzik <mjguzik gmail.com>