[PATCH v9 5/7] sched/fair: Use the same cpumask per-PD throughout find_energy_efficient_cpu()

Vincent Donnefort posted 7 patches 3 years, 8 months ago
There is a newer version of this series
[PATCH v9 5/7] sched/fair: Use the same cpumask per-PD throughout find_energy_efficient_cpu()
Posted by Vincent Donnefort 3 years, 8 months ago
From: Dietmar Eggemann <dietmar.eggemann@arm.com>

The Perf Domain (PD) cpumask (struct em_perf_domain.cpus) stays
invariant after Energy Model creation, i.e. it is not updated after
CPU hotplug operations.

That's why the PD mask is used in conjunction with the cpu_online_mask
(or Sched Domain cpumask). Thereby the cpu_online_mask is fetched
multiple times (in compute_energy()) during a run-queue selection
for a task.

cpu_online_mask may change during this time which can lead to wrong
energy calculations.

To be able to avoid this, use the select_rq_mask per-cpu cpumask to
create a cpumask out of PD cpumask and cpu_online_mask and pass it
through the function calls of the EAS run-queue selection path.

The PD cpumask for max_spare_cap_cpu/compute_prev_delta selection
(find_energy_efficient_cpu()) is now ANDed not only with the SD mask
but also with the cpu_online_mask. This is fine since this cpumask
has to be in syc with the one used for energy computation
(compute_energy()).
An exclusive cpuset setup with at least one asymmetric CPU capacity
island (hence the additional AND with the SD cpumask) is the obvious
exception here.

Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2d7bba2f1da2..57074f27c0d2 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6700,14 +6700,14 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
  * task.
  */
 static long
-compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
+compute_energy(struct task_struct *p, int dst_cpu, struct cpumask *cpus,
+	       struct perf_domain *pd)
 {
-	struct cpumask *pd_mask = perf_domain_span(pd);
 	unsigned long max_util = 0, sum_util = 0, cpu_cap;
 	int cpu;
 
-	cpu_cap = arch_scale_cpu_capacity(cpumask_first(pd_mask));
-	cpu_cap -= arch_scale_thermal_pressure(cpumask_first(pd_mask));
+	cpu_cap = arch_scale_cpu_capacity(cpumask_first(cpus));
+	cpu_cap -= arch_scale_thermal_pressure(cpumask_first(cpus));
 
 	/*
 	 * The capacity state of CPUs of the current rd can be driven by CPUs
@@ -6718,7 +6718,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
 	 * If an entire pd is outside of the current rd, it will not appear in
 	 * its pd list and will not be accounted by compute_energy().
 	 */
-	for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
+	for_each_cpu(cpu, cpus) {
 		unsigned long util_freq = cpu_util_next(cpu, p, dst_cpu);
 		unsigned long cpu_util, util_running = util_freq;
 		struct task_struct *tsk = NULL;
@@ -6805,6 +6805,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
  */
 static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 {
+	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
 	unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
 	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
 	int cpu, best_energy_cpu = prev_cpu, target = -1;
@@ -6839,7 +6840,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 		unsigned long base_energy_pd;
 		int max_spare_cap_cpu = -1;
 
-		for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
+		cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask);
+
+		for_each_cpu_and(cpu, cpus, sched_domain_span(sd)) {
 			if (!cpumask_test_cpu(cpu, p->cpus_ptr))
 				continue;
 
@@ -6876,12 +6879,12 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 			continue;
 
 		/* Compute the 'base' energy of the pd, without @p */
-		base_energy_pd = compute_energy(p, -1, pd);
+		base_energy_pd = compute_energy(p, -1, cpus, pd);
 		base_energy += base_energy_pd;
 
 		/* Evaluate the energy impact of using prev_cpu. */
 		if (compute_prev_delta) {
-			prev_delta = compute_energy(p, prev_cpu, pd);
+			prev_delta = compute_energy(p, prev_cpu, cpus, pd);
 			if (prev_delta < base_energy_pd)
 				goto unlock;
 			prev_delta -= base_energy_pd;
@@ -6890,7 +6893,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 
 		/* Evaluate the energy impact of using max_spare_cap_cpu. */
 		if (max_spare_cap_cpu >= 0) {
-			cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
+			cur_delta = compute_energy(p, max_spare_cap_cpu, cpus,
+						   pd);
 			if (cur_delta < base_energy_pd)
 				goto unlock;
 			cur_delta -= base_energy_pd;
-- 
2.36.1.124.g0e6072fb45-goog
Re: [PATCH v9 5/7] sched/fair: Use the same cpumask per-PD throughout find_energy_efficient_cpu()
Posted by Vincent Guittot 3 years, 8 months ago
On Mon, 23 May 2022 at 17:52, Vincent Donnefort <vdonnefort@google.com> wrote:
>
> From: Dietmar Eggemann <dietmar.eggemann@arm.com>
>
> The Perf Domain (PD) cpumask (struct em_perf_domain.cpus) stays
> invariant after Energy Model creation, i.e. it is not updated after
> CPU hotplug operations.
>
> That's why the PD mask is used in conjunction with the cpu_online_mask
> (or Sched Domain cpumask). Thereby the cpu_online_mask is fetched
> multiple times (in compute_energy()) during a run-queue selection
> for a task.
>
> cpu_online_mask may change during this time which can lead to wrong
> energy calculations.
>
> To be able to avoid this, use the select_rq_mask per-cpu cpumask to
> create a cpumask out of PD cpumask and cpu_online_mask and pass it
> through the function calls of the EAS run-queue selection path.
>
> The PD cpumask for max_spare_cap_cpu/compute_prev_delta selection
> (find_energy_efficient_cpu()) is now ANDed not only with the SD mask
> but also with the cpu_online_mask. This is fine since this cpumask
> has to be in syc with the one used for energy computation
> (compute_energy()).
> An exclusive cpuset setup with at least one asymmetric CPU capacity
> island (hence the additional AND with the SD cpumask) is the obvious
> exception here.
>
> Signed-off-by: Dietmar Eggemann <dietmar.eggemann@arm.com>

Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>

>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 2d7bba2f1da2..57074f27c0d2 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6700,14 +6700,14 @@ static unsigned long cpu_util_without(int cpu, struct task_struct *p)
>   * task.
>   */
>  static long
> -compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
> +compute_energy(struct task_struct *p, int dst_cpu, struct cpumask *cpus,
> +              struct perf_domain *pd)
>  {
> -       struct cpumask *pd_mask = perf_domain_span(pd);
>         unsigned long max_util = 0, sum_util = 0, cpu_cap;
>         int cpu;
>
> -       cpu_cap = arch_scale_cpu_capacity(cpumask_first(pd_mask));
> -       cpu_cap -= arch_scale_thermal_pressure(cpumask_first(pd_mask));
> +       cpu_cap = arch_scale_cpu_capacity(cpumask_first(cpus));
> +       cpu_cap -= arch_scale_thermal_pressure(cpumask_first(cpus));
>
>         /*
>          * The capacity state of CPUs of the current rd can be driven by CPUs
> @@ -6718,7 +6718,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
>          * If an entire pd is outside of the current rd, it will not appear in
>          * its pd list and will not be accounted by compute_energy().
>          */
> -       for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
> +       for_each_cpu(cpu, cpus) {
>                 unsigned long util_freq = cpu_util_next(cpu, p, dst_cpu);
>                 unsigned long cpu_util, util_running = util_freq;
>                 struct task_struct *tsk = NULL;
> @@ -6805,6 +6805,7 @@ compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
>   */
>  static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>  {
> +       struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
>         unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
>         struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
>         int cpu, best_energy_cpu = prev_cpu, target = -1;
> @@ -6839,7 +6840,9 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>                 unsigned long base_energy_pd;
>                 int max_spare_cap_cpu = -1;
>
> -               for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
> +               cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask);
> +
> +               for_each_cpu_and(cpu, cpus, sched_domain_span(sd)) {
>                         if (!cpumask_test_cpu(cpu, p->cpus_ptr))
>                                 continue;
>
> @@ -6876,12 +6879,12 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>                         continue;
>
>                 /* Compute the 'base' energy of the pd, without @p */
> -               base_energy_pd = compute_energy(p, -1, pd);
> +               base_energy_pd = compute_energy(p, -1, cpus, pd);
>                 base_energy += base_energy_pd;
>
>                 /* Evaluate the energy impact of using prev_cpu. */
>                 if (compute_prev_delta) {
> -                       prev_delta = compute_energy(p, prev_cpu, pd);
> +                       prev_delta = compute_energy(p, prev_cpu, cpus, pd);
>                         if (prev_delta < base_energy_pd)
>                                 goto unlock;
>                         prev_delta -= base_energy_pd;
> @@ -6890,7 +6893,8 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>
>                 /* Evaluate the energy impact of using max_spare_cap_cpu. */
>                 if (max_spare_cap_cpu >= 0) {
> -                       cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
> +                       cur_delta = compute_energy(p, max_spare_cap_cpu, cpus,
> +                                                  pd);
>                         if (cur_delta < base_energy_pd)
>                                 goto unlock;
>                         cur_delta -= base_energy_pd;
> --
> 2.36.1.124.g0e6072fb45-goog
>