[RFC PATCH 20/20] rcu: Use kthread preferred affinity for RCU exp kworkers

Frederic Weisbecker posted 20 patches 1 month, 3 weeks ago
[RFC PATCH 20/20] rcu: Use kthread preferred affinity for RCU exp kworkers
Posted by Frederic Weisbecker 1 month, 3 weeks ago
Now that kthreads have an infrastructure to handle preferred affinity
against CPU hotplug and housekeeping cpumask, convert RCU exp workers to
use it instead of handling all the constraints by itself.

Signed-off-by: Frederic Weisbecker <frederic@kernel.org>
---
 kernel/rcu/tree.c | 105 +++++++++-------------------------------------
 1 file changed, 19 insertions(+), 86 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e038f4abb872..f3e40a1dea65 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -4777,6 +4777,22 @@ rcu_boot_init_percpu_data(int cpu)
 	rcu_boot_init_nocb_percpu_data(rdp);
 }
 
+static void rcu_thread_affine_rnp(struct task_struct *t, struct rcu_node *rnp)
+{
+	cpumask_var_t affinity;
+	int cpu;
+
+	if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
+		return;
+
+	for_each_leaf_node_possible_cpu(rnp, cpu)
+		cpumask_set_cpu(cpu, affinity);
+
+	kthread_affine_preferred(t, affinity);
+
+	free_cpumask_var(affinity);
+}
+
 struct kthread_worker *rcu_exp_gp_kworker;
 
 static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp)
@@ -4789,7 +4805,7 @@ static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp)
 	if (rnp->exp_kworker)
 		return;
 
-	kworker = kthread_run_worker(0, name, rnp_index);
+	kworker = kthread_create_worker(0, name, rnp_index);
 	if (IS_ERR_OR_NULL(kworker)) {
 		pr_err("Failed to create par gp kworker on %d/%d\n",
 		       rnp->grplo, rnp->grphi);
@@ -4799,16 +4815,9 @@ static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp)
 
 	if (IS_ENABLED(CONFIG_RCU_EXP_KTHREAD))
 		sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, &param);
-}
 
-static struct task_struct *rcu_exp_par_gp_task(struct rcu_node *rnp)
-{
-	struct kthread_worker *kworker = READ_ONCE(rnp->exp_kworker);
-
-	if (!kworker)
-		return NULL;
-
-	return kworker->task;
+	rcu_thread_affine_rnp(kworker->task, rnp);
+	wake_up_process(kworker->task);
 }
 
 static void __init rcu_start_exp_gp_kworker(void)
@@ -4893,79 +4902,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
 	return 0;
 }
 
-static void rcu_thread_affine_rnp(struct task_struct *t, struct rcu_node *rnp)
-{
-	cpumask_var_t affinity;
-	int cpu;
-
-	if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
-		return;
-
-	for_each_leaf_node_possible_cpu(rnp, cpu)
-		cpumask_set_cpu(cpu, affinity);
-
-	kthread_affine_preferred(t, affinity);
-
-	free_cpumask_var(affinity);
-}
-
-/*
- * Update kthreads affinity during CPU-hotplug changes.
- *
- * Set the per-rcu_node kthread's affinity to cover all CPUs that are
- * served by the rcu_node in question.  The CPU hotplug lock is still
- * held, so the value of rnp->qsmaskinit will be stable.
- *
- * We don't include outgoingcpu in the affinity set, use -1 if there is
- * no outgoing CPU.  If there are no CPUs left in the affinity set,
- * this function allows the kthread to execute on any CPU.
- *
- * Any future concurrent calls are serialized via ->kthread_mutex.
- */
-static void rcutree_affinity_setting(unsigned int cpu, int outgoingcpu)
-{
-	cpumask_var_t cm;
-	unsigned long mask;
-	struct rcu_data *rdp;
-	struct rcu_node *rnp;
-	struct task_struct *task_exp;
-
-	rdp = per_cpu_ptr(&rcu_data, cpu);
-	rnp = rdp->mynode;
-
-	task_exp = rcu_exp_par_gp_task(rnp);
-
-	/*
-	 * If CPU is the boot one, this task is created later from early
-	 * initcall since kthreadd must be created first.
-	 */
-	if (!task_exp)
-		return;
-
-	if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
-		return;
-
-	mutex_lock(&rnp->kthread_mutex);
-	mask = rcu_rnp_online_cpus(rnp);
-	for_each_leaf_node_possible_cpu(rnp, cpu)
-		if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
-		    cpu != outgoingcpu)
-			cpumask_set_cpu(cpu, cm);
-	cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU));
-	if (cpumask_empty(cm)) {
-		cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU));
-		if (outgoingcpu >= 0)
-			cpumask_clear_cpu(outgoingcpu, cm);
-	}
-
-	if (task_exp)
-		set_cpus_allowed_ptr(task_exp, cm);
-
-	mutex_unlock(&rnp->kthread_mutex);
-
-	free_cpumask_var(cm);
-}
-
 /*
  * Has the specified (known valid) CPU ever been fully online?
  */
@@ -4994,7 +4930,6 @@ int rcutree_online_cpu(unsigned int cpu)
 	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
 		return 0; /* Too early in boot for scheduler work. */
 	sync_sched_exp_online_cleanup(cpu);
-	rcutree_affinity_setting(cpu, -1);
 
 	// Stop-machine done, so allow nohz_full to disable tick.
 	tick_dep_clear(TICK_DEP_BIT_RCU);
@@ -5207,8 +5142,6 @@ int rcutree_offline_cpu(unsigned int cpu)
 	rnp->ffmask &= ~rdp->grpmask;
 	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
 
-	rcutree_affinity_setting(cpu, cpu);
-
 	// nohz_full CPUs need the tick for stop-machine to work quickly
 	tick_dep_set(TICK_DEP_BIT_RCU);
 	return 0;
-- 
2.45.2
Re: [RFC PATCH 20/20] rcu: Use kthread preferred affinity for RCU exp kworkers
Posted by Paul E. McKenney 1 month, 3 weeks ago
On Fri, Jul 26, 2024 at 11:56:56PM +0200, Frederic Weisbecker wrote:
> Now that kthreads have an infrastructure to handle preferred affinity
> against CPU hotplug and housekeeping cpumask, convert RCU exp workers to
> use it instead of handling all the constraints by itself.
> 
> Signed-off-by: Frederic Weisbecker <frederic@kernel.org>

Nice consolidation of troublesome code!

For this one and 17/20:

Acked-by: Paul E. McKenney <paulmck@kernel.org>

> ---
>  kernel/rcu/tree.c | 105 +++++++++-------------------------------------
>  1 file changed, 19 insertions(+), 86 deletions(-)
> 
> diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
> index e038f4abb872..f3e40a1dea65 100644
> --- a/kernel/rcu/tree.c
> +++ b/kernel/rcu/tree.c
> @@ -4777,6 +4777,22 @@ rcu_boot_init_percpu_data(int cpu)
>  	rcu_boot_init_nocb_percpu_data(rdp);
>  }
>  
> +static void rcu_thread_affine_rnp(struct task_struct *t, struct rcu_node *rnp)
> +{
> +	cpumask_var_t affinity;
> +	int cpu;
> +
> +	if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
> +		return;
> +
> +	for_each_leaf_node_possible_cpu(rnp, cpu)
> +		cpumask_set_cpu(cpu, affinity);
> +
> +	kthread_affine_preferred(t, affinity);
> +
> +	free_cpumask_var(affinity);
> +}
> +
>  struct kthread_worker *rcu_exp_gp_kworker;
>  
>  static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp)
> @@ -4789,7 +4805,7 @@ static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp)
>  	if (rnp->exp_kworker)
>  		return;
>  
> -	kworker = kthread_run_worker(0, name, rnp_index);
> +	kworker = kthread_create_worker(0, name, rnp_index);
>  	if (IS_ERR_OR_NULL(kworker)) {
>  		pr_err("Failed to create par gp kworker on %d/%d\n",
>  		       rnp->grplo, rnp->grphi);
> @@ -4799,16 +4815,9 @@ static void rcu_spawn_exp_par_gp_kworker(struct rcu_node *rnp)
>  
>  	if (IS_ENABLED(CONFIG_RCU_EXP_KTHREAD))
>  		sched_setscheduler_nocheck(kworker->task, SCHED_FIFO, &param);
> -}
>  
> -static struct task_struct *rcu_exp_par_gp_task(struct rcu_node *rnp)
> -{
> -	struct kthread_worker *kworker = READ_ONCE(rnp->exp_kworker);
> -
> -	if (!kworker)
> -		return NULL;
> -
> -	return kworker->task;
> +	rcu_thread_affine_rnp(kworker->task, rnp);
> +	wake_up_process(kworker->task);
>  }
>  
>  static void __init rcu_start_exp_gp_kworker(void)
> @@ -4893,79 +4902,6 @@ int rcutree_prepare_cpu(unsigned int cpu)
>  	return 0;
>  }
>  
> -static void rcu_thread_affine_rnp(struct task_struct *t, struct rcu_node *rnp)
> -{
> -	cpumask_var_t affinity;
> -	int cpu;
> -
> -	if (!zalloc_cpumask_var(&affinity, GFP_KERNEL))
> -		return;
> -
> -	for_each_leaf_node_possible_cpu(rnp, cpu)
> -		cpumask_set_cpu(cpu, affinity);
> -
> -	kthread_affine_preferred(t, affinity);
> -
> -	free_cpumask_var(affinity);
> -}
> -
> -/*
> - * Update kthreads affinity during CPU-hotplug changes.
> - *
> - * Set the per-rcu_node kthread's affinity to cover all CPUs that are
> - * served by the rcu_node in question.  The CPU hotplug lock is still
> - * held, so the value of rnp->qsmaskinit will be stable.
> - *
> - * We don't include outgoingcpu in the affinity set, use -1 if there is
> - * no outgoing CPU.  If there are no CPUs left in the affinity set,
> - * this function allows the kthread to execute on any CPU.
> - *
> - * Any future concurrent calls are serialized via ->kthread_mutex.
> - */
> -static void rcutree_affinity_setting(unsigned int cpu, int outgoingcpu)
> -{
> -	cpumask_var_t cm;
> -	unsigned long mask;
> -	struct rcu_data *rdp;
> -	struct rcu_node *rnp;
> -	struct task_struct *task_exp;
> -
> -	rdp = per_cpu_ptr(&rcu_data, cpu);
> -	rnp = rdp->mynode;
> -
> -	task_exp = rcu_exp_par_gp_task(rnp);
> -
> -	/*
> -	 * If CPU is the boot one, this task is created later from early
> -	 * initcall since kthreadd must be created first.
> -	 */
> -	if (!task_exp)
> -		return;
> -
> -	if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
> -		return;
> -
> -	mutex_lock(&rnp->kthread_mutex);
> -	mask = rcu_rnp_online_cpus(rnp);
> -	for_each_leaf_node_possible_cpu(rnp, cpu)
> -		if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
> -		    cpu != outgoingcpu)
> -			cpumask_set_cpu(cpu, cm);
> -	cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU));
> -	if (cpumask_empty(cm)) {
> -		cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU));
> -		if (outgoingcpu >= 0)
> -			cpumask_clear_cpu(outgoingcpu, cm);
> -	}
> -
> -	if (task_exp)
> -		set_cpus_allowed_ptr(task_exp, cm);
> -
> -	mutex_unlock(&rnp->kthread_mutex);
> -
> -	free_cpumask_var(cm);
> -}
> -
>  /*
>   * Has the specified (known valid) CPU ever been fully online?
>   */
> @@ -4994,7 +4930,6 @@ int rcutree_online_cpu(unsigned int cpu)
>  	if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE)
>  		return 0; /* Too early in boot for scheduler work. */
>  	sync_sched_exp_online_cleanup(cpu);
> -	rcutree_affinity_setting(cpu, -1);
>  
>  	// Stop-machine done, so allow nohz_full to disable tick.
>  	tick_dep_clear(TICK_DEP_BIT_RCU);
> @@ -5207,8 +5142,6 @@ int rcutree_offline_cpu(unsigned int cpu)
>  	rnp->ffmask &= ~rdp->grpmask;
>  	raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
>  
> -	rcutree_affinity_setting(cpu, cpu);
> -
>  	// nohz_full CPUs need the tick for stop-machine to work quickly
>  	tick_dep_set(TICK_DEP_BIT_RCU);
>  	return 0;
> -- 
> 2.45.2
>