[PATCH] sched/fair: Optimize EAS energy calculation complexity from O(N) to O(1) inside inner loop

Qiliang Yuan posted 1 patch 2 weeks, 2 days ago
There is a newer version of this series
kernel/sched/fair.c     | 46 ++++++++++++++++++++++++-----------------
kernel/sched/sched.h    |  4 ++++
kernel/sched/topology.c |  1 +
3 files changed, 32 insertions(+), 19 deletions(-)
[PATCH] sched/fair: Optimize EAS energy calculation complexity from O(N) to O(1) inside inner loop
Posted by Qiliang Yuan 2 weeks, 2 days ago
By pre-calculating the base max utilization of each performance domain
at the start of find_energy_efficient_cpu(), we can avoid the repetitive
O(M) scan in eenv_pd_max_util() for every candidate CPU.

This reduces the complexity of energy calculation from O(P*N) to O(N + P^2),
where P is the number of performance domains. For systems with many
performance domains or high core counts, this results in significant
reduction in wakeup latency.

Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
---
 kernel/sched/fair.c     | 46 ++++++++++++++++++++++++-----------------
 kernel/sched/sched.h    |  4 ++++
 kernel/sched/topology.c |  1 +
 3 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 458324d240e9..de5bfdfe553f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8236,41 +8236,32 @@ static inline void eenv_pd_busy_time(struct energy_env *eenv,
  * exceed @eenv->cpu_cap.
  */
 static inline unsigned long
-eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
+eenv_pd_max_util(struct energy_env *eenv, struct perf_domain *pd,
 		 struct task_struct *p, int dst_cpu)
 {
-	unsigned long max_util = 0;
-	int cpu;
+	unsigned long max_util = pd->max_util;
 
-	for_each_cpu(cpu, pd_cpus) {
-		struct task_struct *tsk = (cpu == dst_cpu) ? p : NULL;
-		unsigned long util = cpu_util(cpu, p, dst_cpu, 1);
+	if (dst_cpu >= 0 && cpumask_test_cpu(dst_cpu, perf_domain_span(pd))) {
+		unsigned long util = cpu_util(dst_cpu, p, dst_cpu, 1);
 		unsigned long eff_util, min, max;
 
-		/*
-		 * Performance domain frequency: utilization clamping
-		 * must be considered since it affects the selection
-		 * of the performance domain frequency.
-		 * NOTE: in case RT tasks are running, by default the min
-		 * utilization can be max OPP.
-		 */
-		eff_util = effective_cpu_util(cpu, util, &min, &max);
+		eff_util = effective_cpu_util(dst_cpu, util, &min, &max);
 
 		/* Task's uclamp can modify min and max value */
-		if (tsk && uclamp_is_used()) {
+		if (uclamp_is_used()) {
 			min = max(min, uclamp_eff_value(p, UCLAMP_MIN));
 
 			/*
 			 * If there is no active max uclamp constraint,
 			 * directly use task's one, otherwise keep max.
 			 */
-			if (uclamp_rq_is_idle(cpu_rq(cpu)))
+			if (uclamp_rq_is_idle(cpu_rq(dst_cpu)))
 				max = uclamp_eff_value(p, UCLAMP_MAX);
 			else
 				max = max(max, uclamp_eff_value(p, UCLAMP_MAX));
 		}
 
-		eff_util = sugov_effective_cpu_perf(cpu, eff_util, min, max);
+		eff_util = sugov_effective_cpu_perf(dst_cpu, eff_util, min, max);
 		max_util = max(max_util, eff_util);
 	}
 
@@ -8286,7 +8277,7 @@ static inline unsigned long
 compute_energy(struct energy_env *eenv, struct perf_domain *pd,
 	       struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu)
 {
-	unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
+	unsigned long max_util = eenv_pd_max_util(eenv, pd, p, dst_cpu);
 	unsigned long busy_time = eenv->pd_busy_time;
 	unsigned long energy;
 
@@ -8351,7 +8342,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 	unsigned long best_actual_cap = 0;
 	unsigned long prev_actual_cap = 0;
 	struct sched_domain *sd;
-	struct perf_domain *pd;
+	struct perf_domain *pd, *tmp_pd;
 	struct energy_env eenv;
 
 	rcu_read_lock();
@@ -8377,6 +8368,23 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 
 	eenv_task_busy_time(&eenv, p, prev_cpu);
 
+	/* Algorithmic Optimization: Pre-calculate max_util for O(1) energy estimation */
+	for (tmp_pd = pd; tmp_pd; tmp_pd = tmp_pd->next) {
+		unsigned long max_u = 0;
+		int i;
+
+		for_each_cpu(i, perf_domain_span(tmp_pd)) {
+			unsigned long util = cpu_util(i, p, -1, 1);
+			unsigned long eff_util, min, max;
+
+			eff_util = effective_cpu_util(i, util, &min, &max);
+			eff_util = sugov_effective_cpu_perf(i, eff_util, min, max);
+			if (eff_util > max_u)
+				max_u = eff_util;
+		}
+		tmp_pd->max_util = max_u;
+	}
+
 	for (; pd; pd = pd->next) {
 		unsigned long util_min = p_util_min, util_max = p_util_max;
 		unsigned long cpu_cap, cpu_actual_cap, util;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index d30cca6870f5..f308d335ca77 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -972,6 +972,10 @@ struct perf_domain {
 	struct em_perf_domain *em_pd;
 	struct perf_domain *next;
 	struct rcu_head rcu;
+
+	/* O(1) optimization hints */
+	unsigned long	   max_util;
+	int		   max_spare_cap_cpu;
 };
 
 /*
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 5a3f29a26bdb..b9de022ddb53 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -354,6 +354,7 @@ static struct perf_domain *pd_init(int cpu)
 	if (!pd)
 		return NULL;
 	pd->em_pd = obj;
+	pd->max_spare_cap_cpu = -1;
 
 	return pd;
 }
-- 
2.51.0
Re: [PATCH] sched/fair: Optimize EAS energy calculation complexity from O(N) to O(1) inside inner loop
Posted by kernel test robot 2 weeks, 1 day ago
Hi Qiliang,

kernel test robot noticed the following build warnings:

[auto build test WARNING on tip/sched/core]
[also build test WARNING on tip/master peterz-queue/sched/core linus/master v6.16-rc1 next-20260122]
[cannot apply to tip/auto-latest]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Qiliang-Yuan/sched-fair-Optimize-EAS-energy-calculation-complexity-from-O-N-to-O-1-inside-inner-loop/20260123-000924
base:   tip/sched/core
patch link:    https://lore.kernel.org/r/20260122154227.130767-1-realwujing%40gmail.com
patch subject: [PATCH] sched/fair: Optimize EAS energy calculation complexity from O(N) to O(1) inside inner loop
config: s390-allnoconfig-bpf (https://download.01.org/0day-ci/archive/20260123/202601231209.7Aqyec0v-lkp@intel.com/config)
compiler: s390-linux-gcc (GCC) 15.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260123/202601231209.7Aqyec0v-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202601231209.7Aqyec0v-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from ./include/linux/bitmap.h:11,
                    from ./include/linux/cpumask.h:11,
                    from ./include/linux/energy_model.h:4,
                    from kernel/sched/fair.c:23:
   kernel/sched/fair.c: In function 'find_energy_efficient_cpu':
   ./include/linux/cpumask_types.h:18:37: warning: dereferencing 'void *' pointer
      18 | #define cpumask_bits(maskp) ((maskp)->bits)
         |                                     ^~
   ./include/linux/find.h:586:48: note: in definition of macro 'for_each_set_bit'
     586 |         for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
         |                                                ^~~~
   ./include/linux/cpumask.h:380:31: note: in expansion of macro 'cpumask_bits'
     380 |         for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
         |                               ^~~~~~~~~~~~
   kernel/sched/fair.c:8345:17: note: in expansion of macro 'for_each_cpu'
    8345 |                 for_each_cpu(i, perf_domain_span(tmp_pd)) {
         |                 ^~~~~~~~~~~~
   ./include/linux/cpumask_types.h:18:37: error: request for member 'bits' in something not a structure or union
      18 | #define cpumask_bits(maskp) ((maskp)->bits)
         |                                     ^~
   ./include/linux/find.h:586:48: note: in definition of macro 'for_each_set_bit'
     586 |         for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
         |                                                ^~~~
   ./include/linux/cpumask.h:380:31: note: in expansion of macro 'cpumask_bits'
     380 |         for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
         |                               ^~~~~~~~~~~~
   kernel/sched/fair.c:8345:17: note: in expansion of macro 'for_each_cpu'
    8345 |                 for_each_cpu(i, perf_domain_span(tmp_pd)) {
         |                 ^~~~~~~~~~~~
>> ./include/linux/find.h:586:69: warning: left-hand operand of comma expression has no effect [-Wunused-value]
     586 |         for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
         |                                                                     ^
   ./include/linux/cpumask.h:380:9: note: in expansion of macro 'for_each_set_bit'
     380 |         for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
         |         ^~~~~~~~~~~~~~~~
   kernel/sched/fair.c:8345:17: note: in expansion of macro 'for_each_cpu'
    8345 |                 for_each_cpu(i, perf_domain_span(tmp_pd)) {
         |                 ^~~~~~~~~~~~


vim +586 ./include/linux/find.h

6b8ecb84f8f640 include/asm-generic/bitops/find.h Yury Norov 2021-08-14  584  
bc9d6635c293a2 include/linux/find.h              Yury Norov 2021-08-14  585  #define for_each_set_bit(bit, addr, size) \
fdae96a3fc7f70 include/linux/find.h              Yury Norov 2022-09-19 @586  	for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
bc9d6635c293a2 include/linux/find.h              Yury Norov 2021-08-14  587  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH] sched/fair: Optimize EAS energy calculation complexity from O(N) to O(1) inside inner loop
Posted by kernel test robot 2 weeks, 2 days ago
Hi Qiliang,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on tip/master peterz-queue/sched/core linus/master v6.19-rc6 next-20260122]
[cannot apply to tip/auto-latest]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Qiliang-Yuan/sched-fair-Optimize-EAS-energy-calculation-complexity-from-O-N-to-O-1-inside-inner-loop/20260123-000924
base:   tip/sched/core
patch link:    https://lore.kernel.org/r/20260122154227.130767-1-realwujing%40gmail.com
patch subject: [PATCH] sched/fair: Optimize EAS energy calculation complexity from O(N) to O(1) inside inner loop
config: sh-defconfig (https://download.01.org/0day-ci/archive/20260123/202601230700.0p7YsNA0-lkp@intel.com/config)
compiler: sh4-linux-gcc (GCC) 15.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260123/202601230700.0p7YsNA0-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202601230700.0p7YsNA0-lkp@intel.com/

All errors (new ones prefixed by >>):

   In file included from include/linux/bitmap.h:11,
                    from include/linux/cpumask.h:11,
                    from include/linux/energy_model.h:4,
                    from kernel/sched/fair.c:23:
   kernel/sched/fair.c: In function 'find_energy_efficient_cpu':
   include/linux/cpumask_types.h:18:37: warning: dereferencing 'void *' pointer
      18 | #define cpumask_bits(maskp) ((maskp)->bits)
         |                                     ^~
   include/linux/find.h:586:48: note: in definition of macro 'for_each_set_bit'
     586 |         for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
         |                                                ^~~~
   include/linux/cpumask.h:380:31: note: in expansion of macro 'cpumask_bits'
     380 |         for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
         |                               ^~~~~~~~~~~~
   kernel/sched/fair.c:8345:17: note: in expansion of macro 'for_each_cpu'
    8345 |                 for_each_cpu(i, perf_domain_span(tmp_pd)) {
         |                 ^~~~~~~~~~~~
>> include/linux/cpumask_types.h:18:37: error: request for member 'bits' in something not a structure or union
      18 | #define cpumask_bits(maskp) ((maskp)->bits)
         |                                     ^~
   include/linux/find.h:586:48: note: in definition of macro 'for_each_set_bit'
     586 |         for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
         |                                                ^~~~
   include/linux/cpumask.h:380:31: note: in expansion of macro 'cpumask_bits'
     380 |         for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
         |                               ^~~~~~~~~~~~
   kernel/sched/fair.c:8345:17: note: in expansion of macro 'for_each_cpu'
    8345 |                 for_each_cpu(i, perf_domain_span(tmp_pd)) {
         |                 ^~~~~~~~~~~~
   include/linux/find.h:586:69: warning: left-hand operand of comma expression has no effect [-Wunused-value]
     586 |         for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
         |                                                                     ^
   include/linux/cpumask.h:380:9: note: in expansion of macro 'for_each_set_bit'
     380 |         for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
         |         ^~~~~~~~~~~~~~~~
   kernel/sched/fair.c:8345:17: note: in expansion of macro 'for_each_cpu'
    8345 |                 for_each_cpu(i, perf_domain_span(tmp_pd)) {
         |                 ^~~~~~~~~~~~


vim +/bits +18 include/linux/cpumask_types.h

eb4faa36d674ee Yury Norov 2024-05-27  10  
eb4faa36d674ee Yury Norov 2024-05-27  11  /**
eb4faa36d674ee Yury Norov 2024-05-27  12   * cpumask_bits - get the bits in a cpumask
eb4faa36d674ee Yury Norov 2024-05-27  13   * @maskp: the struct cpumask *
eb4faa36d674ee Yury Norov 2024-05-27  14   *
eb4faa36d674ee Yury Norov 2024-05-27  15   * You should only assume nr_cpu_ids bits of this mask are valid.  This is
eb4faa36d674ee Yury Norov 2024-05-27  16   * a macro so it's const-correct.
eb4faa36d674ee Yury Norov 2024-05-27  17   */
eb4faa36d674ee Yury Norov 2024-05-27 @18  #define cpumask_bits(maskp) ((maskp)->bits)
eb4faa36d674ee Yury Norov 2024-05-27  19  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH] sched/fair: Optimize EAS energy calculation complexity from O(N) to O(1) inside inner loop
Posted by kernel test robot 2 weeks, 2 days ago
Hi Qiliang,

kernel test robot noticed the following build errors:

[auto build test ERROR on tip/sched/core]
[also build test ERROR on tip/master peterz-queue/sched/core linus/master v6.19-rc6 next-20260122]
[cannot apply to tip/auto-latest]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Qiliang-Yuan/sched-fair-Optimize-EAS-energy-calculation-complexity-from-O-N-to-O-1-inside-inner-loop/20260123-000924
base:   tip/sched/core
patch link:    https://lore.kernel.org/r/20260122154227.130767-1-realwujing%40gmail.com
patch subject: [PATCH] sched/fair: Optimize EAS energy calculation complexity from O(N) to O(1) inside inner loop
config: x86_64-randconfig-161-20260123 (https://download.01.org/0day-ci/archive/20260123/202601230609.zsX7Ojfn-lkp@intel.com/config)
compiler: clang version 20.1.8 (https://github.com/llvm/llvm-project 87f0227cb60147a26a1eeb4fb06e3b505e9c7261)
smatch version: v0.5.0-8994-gd50c5a4c
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260123/202601230609.zsX7Ojfn-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202601230609.zsX7Ojfn-lkp@intel.com/

All errors (new ones prefixed by >>):

>> kernel/sched/fair.c:8345:3: error: member reference base type 'void' is not a structure or union
    8345 |                 for_each_cpu(i, perf_domain_span(tmp_pd)) {
         |                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/cpumask.h:380:24: note: expanded from macro 'for_each_cpu'
     380 |         for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
         |         ~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   include/linux/cpumask_types.h:18:37: note: expanded from macro 'cpumask_bits'
      18 | #define cpumask_bits(maskp) ((maskp)->bits)
         |                                     ^ ~~~~
   include/linux/find.h:586:41: note: expanded from macro 'for_each_set_bit'
     586 |         for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
         |                                                ^~~~
   1 error generated.


vim +/void +8345 kernel/sched/fair.c

  8262	
  8263	/*
  8264	 * find_energy_efficient_cpu(): Find most energy-efficient target CPU for the
  8265	 * waking task. find_energy_efficient_cpu() looks for the CPU with maximum
  8266	 * spare capacity in each performance domain and uses it as a potential
  8267	 * candidate to execute the task. Then, it uses the Energy Model to figure
  8268	 * out which of the CPU candidates is the most energy-efficient.
  8269	 *
  8270	 * The rationale for this heuristic is as follows. In a performance domain,
  8271	 * all the most energy efficient CPU candidates (according to the Energy
  8272	 * Model) are those for which we'll request a low frequency. When there are
  8273	 * several CPUs for which the frequency request will be the same, we don't
  8274	 * have enough data to break the tie between them, because the Energy Model
  8275	 * only includes active power costs. With this model, if we assume that
  8276	 * frequency requests follow utilization (e.g. using schedutil), the CPU with
  8277	 * the maximum spare capacity in a performance domain is guaranteed to be among
  8278	 * the best candidates of the performance domain.
  8279	 *
  8280	 * In practice, it could be preferable from an energy standpoint to pack
  8281	 * small tasks on a CPU in order to let other CPUs go in deeper idle states,
  8282	 * but that could also hurt our chances to go cluster idle, and we have no
  8283	 * ways to tell with the current Energy Model if this is actually a good
  8284	 * idea or not. So, find_energy_efficient_cpu() basically favors
  8285	 * cluster-packing, and spreading inside a cluster. That should at least be
  8286	 * a good thing for latency, and this is consistent with the idea that most
  8287	 * of the energy savings of EAS come from the asymmetry of the system, and
  8288	 * not so much from breaking the tie between identical CPUs. That's also the
  8289	 * reason why EAS is enabled in the topology code only for systems where
  8290	 * SD_ASYM_CPUCAPACITY is set.
  8291	 *
  8292	 * NOTE: Forkees are not accepted in the energy-aware wake-up path because
  8293	 * they don't have any useful utilization data yet and it's not possible to
  8294	 * forecast their impact on energy consumption. Consequently, they will be
  8295	 * placed by sched_balance_find_dst_cpu() on the least loaded CPU, which might turn out
  8296	 * to be energy-inefficient in some use-cases. The alternative would be to
  8297	 * bias new tasks towards specific types of CPUs first, or to try to infer
  8298	 * their util_avg from the parent task, but those heuristics could hurt
  8299	 * other use-cases too. So, until someone finds a better way to solve this,
  8300	 * let's keep things simple by re-using the existing slow path.
  8301	 */
  8302	static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
  8303	{
  8304		struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_rq_mask);
  8305		unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
  8306		unsigned long p_util_min = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MIN) : 0;
  8307		unsigned long p_util_max = uclamp_is_used() ? uclamp_eff_value(p, UCLAMP_MAX) : 1024;
  8308		struct root_domain *rd = this_rq()->rd;
  8309		int cpu, best_energy_cpu, target = -1;
  8310		int prev_fits = -1, best_fits = -1;
  8311		unsigned long best_actual_cap = 0;
  8312		unsigned long prev_actual_cap = 0;
  8313		struct sched_domain *sd;
  8314		struct perf_domain *pd, *tmp_pd;
  8315		struct energy_env eenv;
  8316	
  8317		rcu_read_lock();
  8318		pd = rcu_dereference_all(rd->pd);
  8319		if (!pd)
  8320			goto unlock;
  8321	
  8322		/*
  8323		 * Energy-aware wake-up happens on the lowest sched_domain starting
  8324		 * from sd_asym_cpucapacity spanning over this_cpu and prev_cpu.
  8325		 */
  8326		sd = rcu_dereference_all(*this_cpu_ptr(&sd_asym_cpucapacity));
  8327		while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
  8328			sd = sd->parent;
  8329		if (!sd)
  8330			goto unlock;
  8331	
  8332		target = prev_cpu;
  8333	
  8334		sync_entity_load_avg(&p->se);
  8335		if (!task_util_est(p) && p_util_min == 0)
  8336			goto unlock;
  8337	
  8338		eenv_task_busy_time(&eenv, p, prev_cpu);
  8339	
  8340		/* Algorithmic Optimization: Pre-calculate max_util for O(1) energy estimation */
  8341		for (tmp_pd = pd; tmp_pd; tmp_pd = tmp_pd->next) {
  8342			unsigned long max_u = 0;
  8343			int i;
  8344	
> 8345			for_each_cpu(i, perf_domain_span(tmp_pd)) {
  8346				unsigned long util = cpu_util(i, p, -1, 1);
  8347				unsigned long eff_util, min, max;
  8348	
  8349				eff_util = effective_cpu_util(i, util, &min, &max);
  8350				eff_util = sugov_effective_cpu_perf(i, eff_util, min, max);
  8351				if (eff_util > max_u)
  8352					max_u = eff_util;
  8353			}
  8354			tmp_pd->max_util = max_u;
  8355		}
  8356	
  8357		for (; pd; pd = pd->next) {
  8358			unsigned long util_min = p_util_min, util_max = p_util_max;
  8359			unsigned long cpu_cap, cpu_actual_cap, util;
  8360			long prev_spare_cap = -1, max_spare_cap = -1;
  8361			unsigned long rq_util_min, rq_util_max;
  8362			unsigned long cur_delta, base_energy;
  8363			int max_spare_cap_cpu = -1;
  8364			int fits, max_fits = -1;
  8365	
  8366			if (!cpumask_and(cpus, perf_domain_span(pd), cpu_online_mask))
  8367				continue;
  8368	
  8369			/* Account external pressure for the energy estimation */
  8370			cpu = cpumask_first(cpus);
  8371			cpu_actual_cap = get_actual_cpu_capacity(cpu);
  8372	
  8373			eenv.cpu_cap = cpu_actual_cap;
  8374			eenv.pd_cap = 0;
  8375	
  8376			for_each_cpu(cpu, cpus) {
  8377				struct rq *rq = cpu_rq(cpu);
  8378	
  8379				eenv.pd_cap += cpu_actual_cap;
  8380	
  8381				if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
  8382					continue;
  8383	
  8384				if (!cpumask_test_cpu(cpu, p->cpus_ptr))
  8385					continue;
  8386	
  8387				util = cpu_util(cpu, p, cpu, 0);
  8388				cpu_cap = capacity_of(cpu);
  8389	
  8390				/*
  8391				 * Skip CPUs that cannot satisfy the capacity request.
  8392				 * IOW, placing the task there would make the CPU
  8393				 * overutilized. Take uclamp into account to see how
  8394				 * much capacity we can get out of the CPU; this is
  8395				 * aligned with sched_cpu_util().
  8396				 */
  8397				if (uclamp_is_used() && !uclamp_rq_is_idle(rq)) {
  8398					/*
  8399					 * Open code uclamp_rq_util_with() except for
  8400					 * the clamp() part. I.e.: apply max aggregation
  8401					 * only. util_fits_cpu() logic requires to
  8402					 * operate on non clamped util but must use the
  8403					 * max-aggregated uclamp_{min, max}.
  8404					 */
  8405					rq_util_min = uclamp_rq_get(rq, UCLAMP_MIN);
  8406					rq_util_max = uclamp_rq_get(rq, UCLAMP_MAX);
  8407	
  8408					util_min = max(rq_util_min, p_util_min);
  8409					util_max = max(rq_util_max, p_util_max);
  8410				}
  8411	
  8412				fits = util_fits_cpu(util, util_min, util_max, cpu);
  8413				if (!fits)
  8414					continue;
  8415	
  8416				lsub_positive(&cpu_cap, util);
  8417	
  8418				if (cpu == prev_cpu) {
  8419					/* Always use prev_cpu as a candidate. */
  8420					prev_spare_cap = cpu_cap;
  8421					prev_fits = fits;
  8422				} else if ((fits > max_fits) ||
  8423					   ((fits == max_fits) && ((long)cpu_cap > max_spare_cap))) {
  8424					/*
  8425					 * Find the CPU with the maximum spare capacity
  8426					 * among the remaining CPUs in the performance
  8427					 * domain.
  8428					 */
  8429					max_spare_cap = cpu_cap;
  8430					max_spare_cap_cpu = cpu;
  8431					max_fits = fits;
  8432				}
  8433			}
  8434	
  8435			if (max_spare_cap_cpu < 0 && prev_spare_cap < 0)
  8436				continue;
  8437	
  8438			eenv_pd_busy_time(&eenv, cpus, p);
  8439			/* Compute the 'base' energy of the pd, without @p */
  8440			base_energy = compute_energy(&eenv, pd, cpus, p, -1);
  8441	
  8442			/* Evaluate the energy impact of using prev_cpu. */
  8443			if (prev_spare_cap > -1) {
  8444				prev_delta = compute_energy(&eenv, pd, cpus, p,
  8445							    prev_cpu);
  8446				/* CPU utilization has changed */
  8447				if (prev_delta < base_energy)
  8448					goto unlock;
  8449				prev_delta -= base_energy;
  8450				prev_actual_cap = cpu_actual_cap;
  8451				best_delta = min(best_delta, prev_delta);
  8452			}
  8453	
  8454			/* Evaluate the energy impact of using max_spare_cap_cpu. */
  8455			if (max_spare_cap_cpu >= 0 && max_spare_cap > prev_spare_cap) {
  8456				/* Current best energy cpu fits better */
  8457				if (max_fits < best_fits)
  8458					continue;
  8459	
  8460				/*
  8461				 * Both don't fit performance hint (i.e. uclamp_min)
  8462				 * but best energy cpu has better capacity.
  8463				 */
  8464				if ((max_fits < 0) &&
  8465				    (cpu_actual_cap <= best_actual_cap))
  8466					continue;
  8467	
  8468				cur_delta = compute_energy(&eenv, pd, cpus, p,
  8469							   max_spare_cap_cpu);
  8470				/* CPU utilization has changed */
  8471				if (cur_delta < base_energy)
  8472					goto unlock;
  8473				cur_delta -= base_energy;
  8474	
  8475				/*
  8476				 * Both fit for the task but best energy cpu has lower
  8477				 * energy impact.
  8478				 */
  8479				if ((max_fits > 0) && (best_fits > 0) &&
  8480				    (cur_delta >= best_delta))
  8481					continue;
  8482	
  8483				best_delta = cur_delta;
  8484				best_energy_cpu = max_spare_cap_cpu;
  8485				best_fits = max_fits;
  8486				best_actual_cap = cpu_actual_cap;
  8487			}
  8488		}
  8489		rcu_read_unlock();
  8490	
  8491		if ((best_fits > prev_fits) ||
  8492		    ((best_fits > 0) && (best_delta < prev_delta)) ||
  8493		    ((best_fits < 0) && (best_actual_cap > prev_actual_cap)))
  8494			target = best_energy_cpu;
  8495	
  8496		return target;
  8497	
  8498	unlock:
  8499		rcu_read_unlock();
  8500	
  8501		return target;
  8502	}
  8503	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH] sched/fair: Optimize EAS energy calculation complexity from O(N) to O(1) inside inner loop
Posted by kernel test robot 2 weeks, 2 days ago
Hi Qiliang,

kernel test robot noticed the following build warnings:

[auto build test WARNING on tip/sched/core]
[also build test WARNING on tip/master peterz-queue/sched/core linus/master v6.19-rc6 next-20260122]
[cannot apply to tip/auto-latest]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Qiliang-Yuan/sched-fair-Optimize-EAS-energy-calculation-complexity-from-O-N-to-O-1-inside-inner-loop/20260123-000924
base:   tip/sched/core
patch link:    https://lore.kernel.org/r/20260122154227.130767-1-realwujing%40gmail.com
patch subject: [PATCH] sched/fair: Optimize EAS energy calculation complexity from O(N) to O(1) inside inner loop
config: sh-defconfig (https://download.01.org/0day-ci/archive/20260123/202601230422.1xw7nG4T-lkp@intel.com/config)
compiler: sh4-linux-gcc (GCC) 15.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260123/202601230422.1xw7nG4T-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202601230422.1xw7nG4T-lkp@intel.com/

All warnings (new ones prefixed by >>):

   In file included from include/linux/bitmap.h:11,
                    from include/linux/cpumask.h:11,
                    from include/linux/energy_model.h:4,
                    from kernel/sched/fair.c:23:
   kernel/sched/fair.c: In function 'find_energy_efficient_cpu':
>> include/linux/cpumask_types.h:18:37: warning: dereferencing 'void *' pointer
      18 | #define cpumask_bits(maskp) ((maskp)->bits)
         |                                     ^~
   include/linux/find.h:586:48: note: in definition of macro 'for_each_set_bit'
     586 |         for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
         |                                                ^~~~
   include/linux/cpumask.h:380:31: note: in expansion of macro 'cpumask_bits'
     380 |         for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
         |                               ^~~~~~~~~~~~
   kernel/sched/fair.c:8345:17: note: in expansion of macro 'for_each_cpu'
    8345 |                 for_each_cpu(i, perf_domain_span(tmp_pd)) {
         |                 ^~~~~~~~~~~~
   include/linux/cpumask_types.h:18:37: error: request for member 'bits' in something not a structure or union
      18 | #define cpumask_bits(maskp) ((maskp)->bits)
         |                                     ^~
   include/linux/find.h:586:48: note: in definition of macro 'for_each_set_bit'
     586 |         for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
         |                                                ^~~~
   include/linux/cpumask.h:380:31: note: in expansion of macro 'cpumask_bits'
     380 |         for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
         |                               ^~~~~~~~~~~~
   kernel/sched/fair.c:8345:17: note: in expansion of macro 'for_each_cpu'
    8345 |                 for_each_cpu(i, perf_domain_span(tmp_pd)) {
         |                 ^~~~~~~~~~~~
>> include/linux/find.h:586:69: warning: left-hand operand of comma expression has no effect [-Wunused-value]
     586 |         for ((bit) = 0; (bit) = find_next_bit((addr), (size), (bit)), (bit) < (size); (bit)++)
         |                                                                     ^
   include/linux/cpumask.h:380:9: note: in expansion of macro 'for_each_set_bit'
     380 |         for_each_set_bit(cpu, cpumask_bits(mask), small_cpumask_bits)
         |         ^~~~~~~~~~~~~~~~
   kernel/sched/fair.c:8345:17: note: in expansion of macro 'for_each_cpu'
    8345 |                 for_each_cpu(i, perf_domain_span(tmp_pd)) {
         |                 ^~~~~~~~~~~~


vim +18 include/linux/cpumask_types.h

eb4faa36d674ee Yury Norov 2024-05-27  10  
eb4faa36d674ee Yury Norov 2024-05-27  11  /**
eb4faa36d674ee Yury Norov 2024-05-27  12   * cpumask_bits - get the bits in a cpumask
eb4faa36d674ee Yury Norov 2024-05-27  13   * @maskp: the struct cpumask *
eb4faa36d674ee Yury Norov 2024-05-27  14   *
eb4faa36d674ee Yury Norov 2024-05-27  15   * You should only assume nr_cpu_ids bits of this mask are valid.  This is
eb4faa36d674ee Yury Norov 2024-05-27  16   * a macro so it's const-correct.
eb4faa36d674ee Yury Norov 2024-05-27  17   */
eb4faa36d674ee Yury Norov 2024-05-27 @18  #define cpumask_bits(maskp) ((maskp)->bits)
eb4faa36d674ee Yury Norov 2024-05-27  19  

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH] sched/fair: Optimize EAS energy calculation complexity from O(N) to O(1) inside inner loop
Posted by Christian Loehle 2 weeks, 2 days ago
On 1/22/26 15:42, Qiliang Yuan wrote:
> By pre-calculating the base max utilization of each performance domain
> at the start of find_energy_efficient_cpu(), we can avoid the repetitive
> O(M) scan in eenv_pd_max_util() for every candidate CPU.
> 
> This reduces the complexity of energy calculation from O(P*N) to O(N + P^2),
> where P is the number of performance domains. For systems with many
> performance domains or high core counts, this results in significant
> reduction in wakeup latency.

I don't think these are correct, but also

> 
> Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
> Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
> ---
>  kernel/sched/fair.c     | 46 ++++++++++++++++++++++++-----------------
>  kernel/sched/sched.h    |  4 ++++
>  kernel/sched/topology.c |  1 +
>  3 files changed, 32 insertions(+), 19 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 458324d240e9..de5bfdfe553f 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -8236,41 +8236,32 @@ static inline void eenv_pd_busy_time(struct energy_env *eenv,
>   * exceed @eenv->cpu_cap.
>   */
>  static inline unsigned long
> -eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
> +eenv_pd_max_util(struct energy_env *eenv, struct perf_domain *pd,
>  		 struct task_struct *p, int dst_cpu)
>  {
> -	unsigned long max_util = 0;
> -	int cpu;
> +	unsigned long max_util = pd->max_util;
>  
> -	for_each_cpu(cpu, pd_cpus) {
> -		struct task_struct *tsk = (cpu == dst_cpu) ? p : NULL;
> -		unsigned long util = cpu_util(cpu, p, dst_cpu, 1);
> +	if (dst_cpu >= 0 && cpumask_test_cpu(dst_cpu, perf_domain_span(pd))) {
> +		unsigned long util = cpu_util(dst_cpu, p, dst_cpu, 1);
>  		unsigned long eff_util, min, max;
>  
> -		/*
> -		 * Performance domain frequency: utilization clamping
> -		 * must be considered since it affects the selection
> -		 * of the performance domain frequency.
> -		 * NOTE: in case RT tasks are running, by default the min
> -		 * utilization can be max OPP.
> -		 */
> -		eff_util = effective_cpu_util(cpu, util, &min, &max);
> +		eff_util = effective_cpu_util(dst_cpu, util, &min, &max);
>  
>  		/* Task's uclamp can modify min and max value */
> -		if (tsk && uclamp_is_used()) {
> +		if (uclamp_is_used()) {
>  			min = max(min, uclamp_eff_value(p, UCLAMP_MIN));
>  
>  			/*
>  			 * If there is no active max uclamp constraint,
>  			 * directly use task's one, otherwise keep max.
>  			 */
> -			if (uclamp_rq_is_idle(cpu_rq(cpu)))
> +			if (uclamp_rq_is_idle(cpu_rq(dst_cpu)))
>  				max = uclamp_eff_value(p, UCLAMP_MAX);
>  			else
>  				max = max(max, uclamp_eff_value(p, UCLAMP_MAX));
>  		}
>  
> -		eff_util = sugov_effective_cpu_perf(cpu, eff_util, min, max);
> +		eff_util = sugov_effective_cpu_perf(dst_cpu, eff_util, min, max);
>  		max_util = max(max_util, eff_util);
>  	}
>  
> @@ -8286,7 +8277,7 @@ static inline unsigned long
>  compute_energy(struct energy_env *eenv, struct perf_domain *pd,
>  	       struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu)
>  {
> -	unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
> +	unsigned long max_util = eenv_pd_max_util(eenv, pd, p, dst_cpu);
>  	unsigned long busy_time = eenv->pd_busy_time;
>  	unsigned long energy;
>  
> @@ -8351,7 +8342,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>  	unsigned long best_actual_cap = 0;
>  	unsigned long prev_actual_cap = 0;
>  	struct sched_domain *sd;
> -	struct perf_domain *pd;
> +	struct perf_domain *pd, *tmp_pd;
>  	struct energy_env eenv;
>  
>  	rcu_read_lock();
> @@ -8377,6 +8368,23 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
>  
>  	eenv_task_busy_time(&eenv, p, prev_cpu);
>  
> +	/* Algorithmic Optimization: Pre-calculate max_util for O(1) energy estimation */
> +	for (tmp_pd = pd; tmp_pd; tmp_pd = tmp_pd->next) {
> +		unsigned long max_u = 0;
> +		int i;
> +
> +		for_each_cpu(i, perf_domain_span(tmp_pd)) {
> +			unsigned long util = cpu_util(i, p, -1, 1);
> +			unsigned long eff_util, min, max;
> +
> +			eff_util = effective_cpu_util(i, util, &min, &max);
> +			eff_util = sugov_effective_cpu_perf(i, eff_util, min, max);
> +			if (eff_util > max_u)
> +				max_u = eff_util;
> +		}
> +		tmp_pd->max_util = max_u;

You can't do this as there's no synchronisation for perf_domain apart from
the rcu_read_lock() here.

> +	}
> +
>  	for (; pd; pd = pd->next) {
>  		unsigned long util_min = p_util_min, util_max = p_util_max;
>  		unsigned long cpu_cap, cpu_actual_cap, util;
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index d30cca6870f5..f308d335ca77 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -972,6 +972,10 @@ struct perf_domain {
>  	struct em_perf_domain *em_pd;
>  	struct perf_domain *next;
>  	struct rcu_head rcu;
> +
> +	/* O(1) optimization hints */
> +	unsigned long	   max_util;
> +	int		   max_spare_cap_cpu;
>  };
>  
>  /*
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index 5a3f29a26bdb..b9de022ddb53 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -354,6 +354,7 @@ static struct perf_domain *pd_init(int cpu)
>  	if (!pd)
>  		return NULL;
>  	pd->em_pd = obj;
> +	pd->max_spare_cap_cpu = -1;
>  
>  	return pd;
>  }
[PATCH v2] sched/fair: Optimize EAS energy calculation complexity from O(N) to O(1) inside inner loop
Posted by Qiliang Yuan 2 weeks, 1 day ago
Pre-calculate the base maximum utilization of each performance domain during the
main loop of find_energy_efficient_cpu() and cache it in the local
'energy_env' structure.

By caching this base value, the maximum utilization for candidate CPU
placements (such as prev_cpu and max_spare_cap_cpu) can be determined in
O(1) time, eliminating redundant scans of the performance domain. This
optimizes the energy estimation path by reducing the number of scans per
performance domain from three to one.

This change significantly reduces wake-up latency on systems with high core
counts or complex performance domain topologies by minimizing the overall
complexity of the Energy-Aware Scheduling (EAS) calculation.

Signed-off-by: Qiliang Yuan <yuanql9@chinatelecom.cn>
Signed-off-by: Qiliang Yuan <realwujing@gmail.com>
---
v2:
 - Ensure RCU safety by using local 'energy_env' for caching instead of
   modifying the shared 'perf_domain' structure.
 - Consolidate pre-calculation into the main loop to avoid an extra pass
   over the performance domains.
v1:
 - Optimize energy calculation by pre-calculating performance domain max utilization.
 - Add max_util and max_spare_cap_cpu to struct perf_domain.
 - Reduce inner loop complexity from O(N) to O(1) for energy estimation.

 kernel/sched/fair.c | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e71302282671..5c114c49c202 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8148,6 +8148,7 @@ struct energy_env {
 	unsigned long pd_busy_time;
 	unsigned long cpu_cap;
 	unsigned long pd_cap;
+	unsigned long pd_max_util;
 };
 
 /*
@@ -8215,41 +8216,32 @@ static inline void eenv_pd_busy_time(struct energy_env *eenv,
  * exceed @eenv->cpu_cap.
  */
 static inline unsigned long
-eenv_pd_max_util(struct energy_env *eenv, struct cpumask *pd_cpus,
+eenv_pd_max_util(struct energy_env *eenv, struct perf_domain *pd,
 		 struct task_struct *p, int dst_cpu)
 {
-	unsigned long max_util = 0;
-	int cpu;
+	unsigned long max_util = eenv->pd_max_util;
 
-	for_each_cpu(cpu, pd_cpus) {
-		struct task_struct *tsk = (cpu == dst_cpu) ? p : NULL;
-		unsigned long util = cpu_util(cpu, p, dst_cpu, 1);
+	if (dst_cpu >= 0 && cpumask_test_cpu(dst_cpu, perf_domain_span(pd))) {
+		unsigned long util = cpu_util(dst_cpu, p, dst_cpu, 1);
 		unsigned long eff_util, min, max;
 
-		/*
-		 * Performance domain frequency: utilization clamping
-		 * must be considered since it affects the selection
-		 * of the performance domain frequency.
-		 * NOTE: in case RT tasks are running, by default the min
-		 * utilization can be max OPP.
-		 */
-		eff_util = effective_cpu_util(cpu, util, &min, &max);
+		eff_util = effective_cpu_util(dst_cpu, util, &min, &max);
 
 		/* Task's uclamp can modify min and max value */
-		if (tsk && uclamp_is_used()) {
+		if (uclamp_is_used()) {
 			min = max(min, uclamp_eff_value(p, UCLAMP_MIN));
 
 			/*
 			 * If there is no active max uclamp constraint,
 			 * directly use task's one, otherwise keep max.
 			 */
-			if (uclamp_rq_is_idle(cpu_rq(cpu)))
+			if (uclamp_rq_is_idle(cpu_rq(dst_cpu)))
 				max = uclamp_eff_value(p, UCLAMP_MAX);
 			else
 				max = max(max, uclamp_eff_value(p, UCLAMP_MAX));
 		}
 
-		eff_util = sugov_effective_cpu_perf(cpu, eff_util, min, max);
+		eff_util = sugov_effective_cpu_perf(dst_cpu, eff_util, min, max);
 		max_util = max(max_util, eff_util);
 	}
 
@@ -8265,7 +8257,7 @@ static inline unsigned long
 compute_energy(struct energy_env *eenv, struct perf_domain *pd,
 	       struct cpumask *pd_cpus, struct task_struct *p, int dst_cpu)
 {
-	unsigned long max_util = eenv_pd_max_util(eenv, pd_cpus, p, dst_cpu);
+	unsigned long max_util = eenv_pd_max_util(eenv, pd, p, dst_cpu);
 	unsigned long busy_time = eenv->pd_busy_time;
 	unsigned long energy;
 
@@ -8376,12 +8368,20 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
 
 		eenv.cpu_cap = cpu_actual_cap;
 		eenv.pd_cap = 0;
+		eenv.pd_max_util = 0;
 
 		for_each_cpu(cpu, cpus) {
 			struct rq *rq = cpu_rq(cpu);
+			unsigned long util_b, eff_util_b, min_b, max_b;
 
 			eenv.pd_cap += cpu_actual_cap;
 
+			/* Pre-calculate base max utilization for the performance domain */
+			util_b = cpu_util(cpu, p, -1, 1);
+			eff_util_b = effective_cpu_util(cpu, util_b, &min_b, &max_b);
+			eff_util_b = sugov_effective_cpu_perf(cpu, eff_util_b, min_b, max_b);
+			eenv.pd_max_util = max(eenv.pd_max_util, eff_util_b);
+
 			if (!cpumask_test_cpu(cpu, sched_domain_span(sd)))
 				continue;
 
-- 
2.51.0