[v4] sched/topology: Optimize sd->shared allocation

[PATCH v4 2/9] sched/topology: Extract "imb_numa_nr" calculation into a separate helper

Posted by K Prateek Nayak 3 weeks, 5 days ago

Subsequent changes to assign "sd->shared" from "s_data" would
necessitate finding the topmost SD_SHARE_LLC to assign shared object to.

This is very similar to the "imb_numa_nr" computation loop except that
"imb_numa_nr" cares about the first domain without the SD_SHARE_LLC flag
(immediate parent of sd_llc) whereas the "sd->shared" assignment would
require sd_llc itself.

Extract the "imb_numa_nr" calculation into a helper
adjust_numa_imbalance() and use the current loop in the
build_sched_domains() to find the sd_llc.

While at it, guard the call behind CONFIG_NUMA's status since
"imb_numa_nr" only makes sense on NUMA enabled configs with SD_NUMA
domains.

No functional changes intended.

Suggested-by: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
Changelog v3..v4:

o New patch based on the suggestion from Valentin and Chenyu in
  https://lore.kernel.org/lkml/xhsmh343e43fd.mognet@vschneid-thinkpadt14sgen2i.remote.csb/

  Notable deviation is moving the entire "imb_numa_nr" loop into the
  adjust_numa_imbalance() helper to keep all the bits in one place
  instead of passing "imb" and "imb_span" as references to the helper.

o Guarded the call behind CONFIG_NUMA's status to save overhead when
  NUMA domains don't exist.
---
 kernel/sched/topology.c | 133 ++++++++++++++++++++++++----------------
 1 file changed, 80 insertions(+), 53 deletions(-)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 34b20b0e1867..7f25c784c038 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2551,6 +2551,74 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
 	return true;
 }
 
+/*
+ * Calculate an allowed NUMA imbalance such that LLCs do not get
+ * imbalanced.
+ */
+static void adjust_numa_imbalance(struct sched_domain *sd_llc)
+{
+	struct sched_domain *parent;
+	unsigned int imb_span = 1;
+	unsigned int imb = 0;
+	unsigned int nr_llcs;
+
+	WARN_ON(!(sd_llc->flags & SD_SHARE_LLC));
+	WARN_ON(!sd_llc->parent);
+
+	/*
+	 * For a single LLC per node, allow an
+	 * imbalance up to 12.5% of the node. This is
+	 * arbitrary cutoff based two factors -- SMT and
+	 * memory channels. For SMT-2, the intent is to
+	 * avoid premature sharing of HT resources but
+	 * SMT-4 or SMT-8 *may* benefit from a different
+	 * cutoff. For memory channels, this is a very
+	 * rough estimate of how many channels may be
+	 * active and is based on recent CPUs with
+	 * many cores.
+	 *
+	 * For multiple LLCs, allow an imbalance
+	 * until multiple tasks would share an LLC
+	 * on one node while LLCs on another node
+	 * remain idle. This assumes that there are
+	 * enough logical CPUs per LLC to avoid SMT
+	 * factors and that there is a correlation
+	 * between LLCs and memory channels.
+	 */
+	nr_llcs = sd_llc->parent->span_weight / sd_llc->span_weight;
+	if (nr_llcs == 1)
+		imb = sd_llc->parent->span_weight >> 3;
+	else
+		imb = nr_llcs;
+
+	imb = max(1U, imb);
+	sd_llc->parent->imb_numa_nr = imb;
+
+	/*
+	 * Set span based on the first NUMA domain.
+	 *
+	 * NUMA systems always add a NODE domain before
+	 * iterating the NUMA domains. Since this is before
+	 * degeneration, start from sd_llc's parent's
+	 * parent which is the lowest an SD_NUMA domain can
+	 * be relative to sd_llc.
+	 */
+	parent = sd_llc->parent->parent;
+	while (parent && !(parent->flags & SD_NUMA))
+		parent = parent->parent;
+
+	imb_span = parent ? parent->span_weight : sd_llc->parent->span_weight;
+
+	/* Update the upper remainder of the topology */
+	parent = sd_llc->parent;
+	while (parent) {
+		int factor = max(1U, (parent->span_weight / imb_span));
+
+		parent->imb_numa_nr = imb * factor;
+		parent = parent->parent;
+	}
+}
+
 /*
  * Build sched domains for a given set of CPUs and attach the sched domains
  * to the individual CPUs
@@ -2608,62 +2676,21 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 		}
 	}
 
-	/*
-	 * Calculate an allowed NUMA imbalance such that LLCs do not get
-	 * imbalanced.
-	 */
 	for_each_cpu(i, cpu_map) {
-		unsigned int imb = 0;
-		unsigned int imb_span = 1;
+		sd = *per_cpu_ptr(d.sd, i);
+		if (!sd)
+			continue;
 
-		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
-			struct sched_domain *child = sd->child;
-
-			if (!(sd->flags & SD_SHARE_LLC) && child &&
-			    (child->flags & SD_SHARE_LLC)) {
-				struct sched_domain __rcu *top_p;
-				unsigned int nr_llcs;
-
-				/*
-				 * For a single LLC per node, allow an
-				 * imbalance up to 12.5% of the node. This is
-				 * arbitrary cutoff based two factors -- SMT and
-				 * memory channels. For SMT-2, the intent is to
-				 * avoid premature sharing of HT resources but
-				 * SMT-4 or SMT-8 *may* benefit from a different
-				 * cutoff. For memory channels, this is a very
-				 * rough estimate of how many channels may be
-				 * active and is based on recent CPUs with
-				 * many cores.
-				 *
-				 * For multiple LLCs, allow an imbalance
-				 * until multiple tasks would share an LLC
-				 * on one node while LLCs on another node
-				 * remain idle. This assumes that there are
-				 * enough logical CPUs per LLC to avoid SMT
-				 * factors and that there is a correlation
-				 * between LLCs and memory channels.
-				 */
-				nr_llcs = sd->span_weight / child->span_weight;
-				if (nr_llcs == 1)
-					imb = sd->span_weight >> 3;
-				else
-					imb = nr_llcs;
-				imb = max(1U, imb);
-				sd->imb_numa_nr = imb;
-
-				/* Set span based on the first NUMA domain. */
-				top_p = sd->parent;
-				while (top_p && !(top_p->flags & SD_NUMA)) {
-					top_p = top_p->parent;
-				}
-				imb_span = top_p ? top_p->span_weight : sd->span_weight;
-			} else {
-				int factor = max(1U, (sd->span_weight / imb_span));
+		/* First, find the topmost SD_SHARE_LLC domain */
+		while (sd->parent && (sd->parent->flags & SD_SHARE_LLC))
+			sd = sd->parent;
 
-				sd->imb_numa_nr = imb * factor;
-			}
-		}
+		/*
+		 * In presence of higher domains, adjust the
+		 * NUMA imbalance stats for the hierarchy.
+		 */
+		if (IS_ENABLED(CONFIG_NUMA) && (sd->flags & SD_SHARE_LLC) && sd->parent)
+			adjust_numa_imbalance(sd);
 	}
 
 	/* Calculate CPU capacity for physical packages and nodes */
-- 
2.34.1

Re: [PATCH v4 2/9] sched/topology: Extract "imb_numa_nr" calculation into a separate helper

Posted by Dietmar Eggemann 3 weeks, 1 day ago

On 12.03.26 05:44, K Prateek Nayak wrote:

[...]

> +/*
> + * Calculate an allowed NUMA imbalance such that LLCs do not get
> + * imbalanced.
> + */
> +static void adjust_numa_imbalance(struct sched_domain *sd_llc)
> +{
> +	struct sched_domain *parent;
> +	unsigned int imb_span = 1;
> +	unsigned int imb = 0;
> +	unsigned int nr_llcs;
> +
> +	WARN_ON(!(sd_llc->flags & SD_SHARE_LLC));
> +	WARN_ON(!sd_llc->parent);
> +
> +	/*
> +	 * For a single LLC per node, allow an
> +	 * imbalance up to 12.5% of the node. This is
> +	 * arbitrary cutoff based two factors -- SMT and
> +	 * memory channels. For SMT-2, the intent is to
> +	 * avoid premature sharing of HT resources but
> +	 * SMT-4 or SMT-8 *may* benefit from a different
> +	 * cutoff. For memory channels, this is a very
> +	 * rough estimate of how many channels may be
> +	 * active and is based on recent CPUs with
> +	 * many cores.
> +	 *
> +	 * For multiple LLCs, allow an imbalance
> +	 * until multiple tasks would share an LLC
> +	 * on one node while LLCs on another node
> +	 * remain idle. This assumes that there are
> +	 * enough logical CPUs per LLC to avoid SMT
> +	 * factors and that there is a correlation
> +	 * between LLCs and memory channels.
> +	 */
> +	nr_llcs = sd_llc->parent->span_weight / sd_llc->span_weight;
> +	if (nr_llcs == 1)
> +		imb = sd_llc->parent->span_weight >> 3;
> +	else
> +		imb = nr_llcs;
> +
> +	imb = max(1U, imb);
> +	sd_llc->parent->imb_numa_nr = imb;

Here you set imb_numa_nr e.g. for PKG ...

> +
> +	/*
> +	 * Set span based on the first NUMA domain.
> +	 *
> +	 * NUMA systems always add a NODE domain before
> +	 * iterating the NUMA domains. Since this is before
> +	 * degeneration, start from sd_llc's parent's
> +	 * parent which is the lowest an SD_NUMA domain can
> +	 * be relative to sd_llc.
> +	 */
> +	parent = sd_llc->parent->parent;
> +	while (parent && !(parent->flags & SD_NUMA))
> +		parent = parent->parent;
> +
> +	imb_span = parent ? parent->span_weight : sd_llc->parent->span_weight;
> +
> +	/* Update the upper remainder of the topology */
> +	parent = sd_llc->parent;
> +	while (parent) {
> +		int factor = max(1U, (parent->span_weight / imb_span));
> +
> +		parent->imb_numa_nr = imb * factor;

... and here again.

Shouldn't we only set it for 'if (parent->flags & SD_NUMA)'?

Not sure if there are case in which PKG would persist in

... -> MC -> PKG -> NODE -> NUMA -> ... ?

Although access to sd->imb_numa_nr seems to be guarded by sd->flags &
SD_NUMA.

> +		parent = parent->parent;
> +	}
> +}
> +
[...]

Re: [PATCH v4 2/9] sched/topology: Extract "imb_numa_nr" calculation into a separate helper

Posted by K Prateek Nayak 3 weeks, 1 day ago

Hello Dietmar,

On 3/16/2026 5:48 AM, Dietmar Eggemann wrote:
>> +	/*
>> +	 * For a single LLC per node, allow an
>> +	 * imbalance up to 12.5% of the node. This is
>> +	 * arbitrary cutoff based two factors -- SMT and
>> +	 * memory channels. For SMT-2, the intent is to
>> +	 * avoid premature sharing of HT resources but
>> +	 * SMT-4 or SMT-8 *may* benefit from a different
>> +	 * cutoff. For memory channels, this is a very
>> +	 * rough estimate of how many channels may be
>> +	 * active and is based on recent CPUs with
>> +	 * many cores.
>> +	 *
>> +	 * For multiple LLCs, allow an imbalance
>> +	 * until multiple tasks would share an LLC
>> +	 * on one node while LLCs on another node
>> +	 * remain idle. This assumes that there are
>> +	 * enough logical CPUs per LLC to avoid SMT
>> +	 * factors and that there is a correlation
>> +	 * between LLCs and memory channels.
>> +	 */
>> +	nr_llcs = sd_llc->parent->span_weight / sd_llc->span_weight;
>> +	if (nr_llcs == 1)
>> +		imb = sd_llc->parent->span_weight >> 3;
>> +	else
>> +		imb = nr_llcs;
>> +
>> +	imb = max(1U, imb);
>> +	sd_llc->parent->imb_numa_nr = imb;
> 
> Here you set imb_numa_nr e.g. for PKG ...

Ack! That is indeed a redundant assign since it gets reassigned
in the bottom loop. For this commit, we have kept it 1:1 with the
loop that existed before in build_sched_domains(). 

> 
>> +
>> +	/*
>> +	 * Set span based on the first NUMA domain.
>> +	 *
>> +	 * NUMA systems always add a NODE domain before
>> +	 * iterating the NUMA domains. Since this is before
>> +	 * degeneration, start from sd_llc's parent's
>> +	 * parent which is the lowest an SD_NUMA domain can
>> +	 * be relative to sd_llc.
>> +	 */
>> +	parent = sd_llc->parent->parent;
>> +	while (parent && !(parent->flags & SD_NUMA))
>> +		parent = parent->parent;
>> +
>> +	imb_span = parent ? parent->span_weight : sd_llc->parent->span_weight;
>> +
>> +	/* Update the upper remainder of the topology */
>> +	parent = sd_llc->parent;
>> +	while (parent) {
>> +		int factor = max(1U, (parent->span_weight / imb_span));
>> +
>> +		parent->imb_numa_nr = imb * factor;
> 
> ... and here again.
> 
> Shouldn't we only set it for 'if (parent->flags & SD_NUMA)'?
> 
> Not sure if there are case in which PKG would persist in
> 
> ... -> MC -> PKG -> NODE -> NUMA -> ... ?
> 
> Although access to sd->imb_numa_nr seems to be guarded by sd->flags &
> SD_NUMA.

Indeed! "imb_numa_nr" only makes sense when looking at NUMA domains
and having it assigned to 1 for lower domains is harmless
(but wasteful indeed). I'm 99% sure we can simply do:

  (Only build tested)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 43150591914b..e9068a809dbc 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2623,9 +2623,6 @@ static void adjust_numa_imbalance(struct sched_domain *sd_llc)
 	else
 		imb = nr_llcs;
 
-	imb = max(1U, imb);
-	sd_llc->parent->imb_numa_nr = imb;
-
 	/*
 	 * Set span based on the first NUMA domain.
 	 *
@@ -2639,10 +2636,14 @@ static void adjust_numa_imbalance(struct sched_domain *sd_llc)
 	while (parent && !(parent->flags & SD_NUMA))
 		parent = parent->parent;
 
-	imb_span = parent ? parent->span_weight : sd_llc->parent->span_weight;
+	/* No NUMA domain to adjust imbalance for! */
+	if (!parent)
+		return;
+
+	imb = max(1U, imb);
+	imb_span = parent->span_weight;
 
 	/* Update the upper remainder of the topology */
-	parent = sd_llc->parent;
 	while (parent) {
 		int factor = max(1U, (parent->span_weight / imb_span));
 
---

If we have NUMA domains, we definitely have NODE and NODE sets neither
SD_SHARE_LLC, nor SD_NUMA so likely sd->parent is PKG / NODE domain and
NUMA has to start at sd->parent->parent and it has to break at the first
SD_NUMA domains.

If it doesn't exist, we don't have any NUMA domains and nothing to worry
about, and if we do, the final loop will adjust the NUMA imbalance.

Thoughts? Again, this commit was kept 1:1 with the previous loop but we
can always improve :-)

> 
>> +		parent = parent->parent;
>> +	}
>> +}
>> +
> [...]

-- 
Thanks and Regards,
Prateek

Re: [PATCH v4 2/9] sched/topology: Extract "imb_numa_nr" calculation into a separate helper

Posted by Dietmar Eggemann 3 weeks, 1 day ago

Hi Prateek,

On 16.03.26 04:41, K Prateek Nayak wrote:
> Hello Dietmar,
> 
> On 3/16/2026 5:48 AM, Dietmar Eggemann wrote:

[...]

> Indeed! "imb_numa_nr" only makes sense when looking at NUMA domains
> and having it assigned to 1 for lower domains is harmless
> (but wasteful indeed). I'm 99% sure we can simply do:
> 
>   (Only build tested)
> 
> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
> index 43150591914b..e9068a809dbc 100644
> --- a/kernel/sched/topology.c
> +++ b/kernel/sched/topology.c
> @@ -2623,9 +2623,6 @@ static void adjust_numa_imbalance(struct sched_domain *sd_llc)
>  	else
>  		imb = nr_llcs;
>  
> -	imb = max(1U, imb);
> -	sd_llc->parent->imb_numa_nr = imb;
> -
>  	/*
>  	 * Set span based on the first NUMA domain.
>  	 *
> @@ -2639,10 +2636,14 @@ static void adjust_numa_imbalance(struct sched_domain *sd_llc)
>  	while (parent && !(parent->flags & SD_NUMA))
>  		parent = parent->parent;
>  
> -	imb_span = parent ? parent->span_weight : sd_llc->parent->span_weight;
> +	/* No NUMA domain to adjust imbalance for! */
> +	if (!parent)
> +		return;
> +
> +	imb = max(1U, imb);
> +	imb_span = parent->span_weight;
>  
>  	/* Update the upper remainder of the topology */
> -	parent = sd_llc->parent;
>  	while (parent) {
>  		int factor = max(1U, (parent->span_weight / imb_span));
>  
> ---
> 
> If we have NUMA domains, we definitely have NODE and NODE sets neither
> SD_SHARE_LLC, nor SD_NUMA so likely sd->parent is PKG / NODE domain and
> NUMA has to start at sd->parent->parent and it has to break at the first
> SD_NUMA domains.
> 
> If it doesn't exist, we don't have any NUMA domains and nothing to worry
> about, and if we do, the final loop will adjust the NUMA imbalance.
> 
> Thoughts? Again, this commit was kept 1:1 with the previous loop but we
> can always improve :-)
Ah, I see!

This would work, IMHO.

Tested on qemu-system-aarch64 w/

  -smp 8,sockets=2,clusters=2,cores=2,threads=1

Are you aware of a setup in which PKG would survive between MC and
lowest NUMA?

Re: [PATCH v4 2/9] sched/topology: Extract "imb_numa_nr" calculation into a separate helper

Posted by K Prateek Nayak 3 weeks, 1 day ago

Hello Dietmar,

On 3/16/2026 1:54 PM, Dietmar Eggemann wrote:
> Hi Prateek,
> 
> On 16.03.26 04:41, K Prateek Nayak wrote:
>> Hello Dietmar,
>>
>> On 3/16/2026 5:48 AM, Dietmar Eggemann wrote:
> 
> [...]
> 
>> Indeed! "imb_numa_nr" only makes sense when looking at NUMA domains
>> and having it assigned to 1 for lower domains is harmless
>> (but wasteful indeed). I'm 99% sure we can simply do:
>>
>>   (Only build tested)
>>
>> diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
>> index 43150591914b..e9068a809dbc 100644
>> --- a/kernel/sched/topology.c
>> +++ b/kernel/sched/topology.c
>> @@ -2623,9 +2623,6 @@ static void adjust_numa_imbalance(struct sched_domain *sd_llc)
>>  	else
>>  		imb = nr_llcs;
>>  
>> -	imb = max(1U, imb);
>> -	sd_llc->parent->imb_numa_nr = imb;
>> -
>>  	/*
>>  	 * Set span based on the first NUMA domain.
>>  	 *
>> @@ -2639,10 +2636,14 @@ static void adjust_numa_imbalance(struct sched_domain *sd_llc)
>>  	while (parent && !(parent->flags & SD_NUMA))
>>  		parent = parent->parent;
>>  
>> -	imb_span = parent ? parent->span_weight : sd_llc->parent->span_weight;
>> +	/* No NUMA domain to adjust imbalance for! */
>> +	if (!parent)
>> +		return;
>> +
>> +	imb = max(1U, imb);
>> +	imb_span = parent->span_weight;
>>  
>>  	/* Update the upper remainder of the topology */
>> -	parent = sd_llc->parent;
>>  	while (parent) {
>>  		int factor = max(1U, (parent->span_weight / imb_span));
>>  
>> ---
>>
>> If we have NUMA domains, we definitely have NODE and NODE sets neither
>> SD_SHARE_LLC, nor SD_NUMA so likely sd->parent is PKG / NODE domain and
>> NUMA has to start at sd->parent->parent and it has to break at the first
>> SD_NUMA domains.
>>
>> If it doesn't exist, we don't have any NUMA domains and nothing to worry
>> about, and if we do, the final loop will adjust the NUMA imbalance.
>>
>> Thoughts? Again, this commit was kept 1:1 with the previous loop but we
>> can always improve :-)
> Ah, I see!
> 
> This would work, IMHO.
> 
> Tested on qemu-system-aarch64 w/
> 
>   -smp 8,sockets=2,clusters=2,cores=2,threads=1
> 
> Are you aware of a setup in which PKG would survive between MC and
> lowest NUMA?

On x86, you can have:

  -smp 8,sockets=2,dies=2,cores=2,threads=1

and each "die" will appear as an MC within the socket so we get

  NUMA {         0-7         }
  NODE {   0-3   } {   4-7   }
  PKG  {   0-3   } {   4-7   }
  MC   {0,1} {2,3} {4,5} {6,7}

In the above case, NODE is degenerated since it matches with PKG
and MC, PKG, NUMA survive at the end.

-- 
Thanks and Regards,
Prateek

Re: [PATCH v4 2/9] sched/topology: Extract "imb_numa_nr" calculation into a separate helper

Posted by kernel test robot 3 weeks, 5 days ago

Hi Prateek,

kernel test robot noticed the following build warnings:

[auto build test WARNING on 54a66e431eeacf23e1dc47cb3507f2d0c068aaf0]

url:    https://github.com/intel-lab-lkp/linux/commits/K-Prateek-Nayak/sched-topology-Compute-sd_weight-considering-cpuset-partitions/20260312-125021
base:   54a66e431eeacf23e1dc47cb3507f2d0c068aaf0
patch link:    https://lore.kernel.org/r/20260312044434.1974-3-kprateek.nayak%40amd.com
patch subject: [PATCH v4 2/9] sched/topology: Extract "imb_numa_nr" calculation into a separate helper
config: nios2-randconfig-r131-20260312 (https://download.01.org/0day-ci/archive/20260312/202603122149.xyvcIkPY-lkp@intel.com/config)
compiler: nios2-linux-gcc (GCC) 8.5.0
sparse: v0.6.5-rc1
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260312/202603122149.xyvcIkPY-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202603122149.xyvcIkPY-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
   kernel/sched/build_utility.c: note: in included file:
   kernel/sched/debug.c:730:17: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *[assigned] sd @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/debug.c:730:17: sparse:     expected struct sched_domain *[assigned] sd
   kernel/sched/debug.c:730:17: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/debug.c:1069:9: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected struct task_struct *tsk @@     got struct task_struct [noderef] __rcu *curr @@
   kernel/sched/debug.c:1069:9: sparse:     expected struct task_struct *tsk
   kernel/sched/debug.c:1069:9: sparse:     got struct task_struct [noderef] __rcu *curr
   kernel/sched/debug.c:1069:9: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected struct task_struct *tsk @@     got struct task_struct [noderef] __rcu *curr @@
   kernel/sched/debug.c:1069:9: sparse:     expected struct task_struct *tsk
   kernel/sched/debug.c:1069:9: sparse:     got struct task_struct [noderef] __rcu *curr
   kernel/sched/build_utility.c: note: in included file:
   kernel/sched/stats.c:136:17: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *[assigned] sd @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/stats.c:136:17: sparse:     expected struct sched_domain *[assigned] sd
   kernel/sched/stats.c:136:17: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/build_utility.c: note: in included file:
   kernel/sched/topology.c:116:56: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected struct sched_domain *sd @@     got struct sched_domain [noderef] __rcu *child @@
   kernel/sched/topology.c:116:56: sparse:     expected struct sched_domain *sd
   kernel/sched/topology.c:116:56: sparse:     got struct sched_domain [noderef] __rcu *child
   kernel/sched/topology.c:135:60: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected struct sched_domain *sd @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/topology.c:135:60: sparse:     expected struct sched_domain *sd
   kernel/sched/topology.c:135:60: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/topology.c:158:20: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *sd @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/topology.c:158:20: sparse:     expected struct sched_domain *sd
   kernel/sched/topology.c:158:20: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/topology.c:469:19: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected struct perf_domain *pd @@     got struct perf_domain [noderef] __rcu *pd @@
   kernel/sched/topology.c:469:19: sparse:     expected struct perf_domain *pd
   kernel/sched/topology.c:469:19: sparse:     got struct perf_domain [noderef] __rcu *pd
   kernel/sched/topology.c:644:49: sparse: sparse: incorrect type in initializer (different address spaces) @@     expected struct sched_domain *parent @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/topology.c:644:49: sparse:     expected struct sched_domain *parent
   kernel/sched/topology.c:644:49: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/topology.c:729:50: sparse: sparse: incorrect type in initializer (different address spaces) @@     expected struct sched_domain *parent @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/topology.c:729:50: sparse:     expected struct sched_domain *parent
   kernel/sched/topology.c:729:50: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/topology.c:737:55: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain [noderef] __rcu *[noderef] __rcu child @@     got struct sched_domain *[assigned] tmp @@
   kernel/sched/topology.c:737:55: sparse:     expected struct sched_domain [noderef] __rcu *[noderef] __rcu child
   kernel/sched/topology.c:737:55: sparse:     got struct sched_domain *[assigned] tmp
   kernel/sched/topology.c:750:29: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *[assigned] tmp @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/topology.c:750:29: sparse:     expected struct sched_domain *[assigned] tmp
   kernel/sched/topology.c:750:29: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/topology.c:755:20: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *sd @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/topology.c:755:20: sparse:     expected struct sched_domain *sd
   kernel/sched/topology.c:755:20: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/topology.c:776:13: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *[assigned] tmp @@     got struct sched_domain [noderef] __rcu *sd @@
   kernel/sched/topology.c:776:13: sparse:     expected struct sched_domain *[assigned] tmp
   kernel/sched/topology.c:776:13: sparse:     got struct sched_domain [noderef] __rcu *sd
   kernel/sched/topology.c:938:70: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected struct sched_domain *sd @@     got struct sched_domain [noderef] __rcu *child @@
   kernel/sched/topology.c:938:70: sparse:     expected struct sched_domain *sd
   kernel/sched/topology.c:938:70: sparse:     got struct sched_domain [noderef] __rcu *child
   kernel/sched/topology.c:967:59: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected struct sched_domain *sd @@     got struct sched_domain [noderef] __rcu *child @@
   kernel/sched/topology.c:967:59: sparse:     expected struct sched_domain *sd
   kernel/sched/topology.c:967:59: sparse:     got struct sched_domain [noderef] __rcu *child
   kernel/sched/topology.c:1013:57: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected struct sched_domain *sd @@     got struct sched_domain [noderef] __rcu *child @@
   kernel/sched/topology.c:1013:57: sparse:     expected struct sched_domain *sd
   kernel/sched/topology.c:1013:57: sparse:     got struct sched_domain [noderef] __rcu *child
   kernel/sched/topology.c:1015:25: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *sibling @@     got struct sched_domain [noderef] __rcu *child @@
   kernel/sched/topology.c:1015:25: sparse:     expected struct sched_domain *sibling
   kernel/sched/topology.c:1015:25: sparse:     got struct sched_domain [noderef] __rcu *child
   kernel/sched/topology.c:1023:55: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected struct sched_domain *sd @@     got struct sched_domain [noderef] __rcu *child @@
   kernel/sched/topology.c:1023:55: sparse:     expected struct sched_domain *sd
   kernel/sched/topology.c:1023:55: sparse:     got struct sched_domain [noderef] __rcu *child
   kernel/sched/topology.c:1025:25: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *sibling @@     got struct sched_domain [noderef] __rcu *child @@
   kernel/sched/topology.c:1025:25: sparse:     expected struct sched_domain *sibling
   kernel/sched/topology.c:1025:25: sparse:     got struct sched_domain [noderef] __rcu *child
   kernel/sched/topology.c:1095:62: sparse: sparse: incorrect type in argument 1 (different address spaces) @@     expected struct sched_domain *sd @@     got struct sched_domain [noderef] __rcu *child @@
   kernel/sched/topology.c:1095:62: sparse:     expected struct sched_domain *sd
   kernel/sched/topology.c:1095:62: sparse:     got struct sched_domain [noderef] __rcu *child
   kernel/sched/topology.c:1199:40: sparse: sparse: incorrect type in initializer (different address spaces) @@     expected struct sched_domain *child @@     got struct sched_domain [noderef] __rcu *child @@
   kernel/sched/topology.c:1199:40: sparse:     expected struct sched_domain *child
   kernel/sched/topology.c:1199:40: sparse:     got struct sched_domain [noderef] __rcu *child
   kernel/sched/topology.c:1337:9: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *[assigned] sd @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/topology.c:1337:9: sparse:     expected struct sched_domain *[assigned] sd
   kernel/sched/topology.c:1337:9: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/topology.c:1683:43: sparse: sparse: incorrect type in initializer (different address spaces) @@     expected struct sched_domain [noderef] __rcu *child @@     got struct sched_domain *child @@
   kernel/sched/topology.c:1683:43: sparse:     expected struct sched_domain [noderef] __rcu *child
   kernel/sched/topology.c:1683:43: sparse:     got struct sched_domain *child
   kernel/sched/topology.c:2478:31: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain [noderef] __rcu *parent @@     got struct sched_domain *sd @@
   kernel/sched/topology.c:2478:31: sparse:     expected struct sched_domain [noderef] __rcu *parent
   kernel/sched/topology.c:2478:31: sparse:     got struct sched_domain *sd
>> kernel/sched/topology.c:2606:16: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *parent @@     got struct sched_domain [noderef] __rcu *[noderef] __rcu parent @@
   kernel/sched/topology.c:2606:16: sparse:     expected struct sched_domain *parent
   kernel/sched/topology.c:2606:16: sparse:     got struct sched_domain [noderef] __rcu *[noderef] __rcu parent
>> kernel/sched/topology.c:2608:24: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *parent @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/topology.c:2608:24: sparse:     expected struct sched_domain *parent
   kernel/sched/topology.c:2608:24: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/topology.c:2613:16: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *parent @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/topology.c:2613:16: sparse:     expected struct sched_domain *parent
   kernel/sched/topology.c:2613:16: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/topology.c:2618:24: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *parent @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/topology.c:2618:24: sparse:     expected struct sched_domain *parent
   kernel/sched/topology.c:2618:24: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/topology.c:2667:57: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *[assigned] sd @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/topology.c:2667:57: sparse:     expected struct sched_domain *[assigned] sd
   kernel/sched/topology.c:2667:57: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/topology.c:2686:28: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *[assigned] sd @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/topology.c:2686:28: sparse:     expected struct sched_domain *[assigned] sd
   kernel/sched/topology.c:2686:28: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/topology.c:2701:57: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *[assigned] sd @@     got struct sched_domain [noderef] __rcu *parent @@
   kernel/sched/topology.c:2701:57: sparse:     expected struct sched_domain *[assigned] sd
   kernel/sched/topology.c:2701:57: sparse:     got struct sched_domain [noderef] __rcu *parent
   kernel/sched/build_utility.c: note: in included file:
   kernel/sched/build_utility.c: note: in included file:
   kernel/sched/sched.h:2367:25: sparse: sparse: incompatible types in comparison expression (different address spaces):
   kernel/sched/sched.h:2367:25: sparse:    struct task_struct [noderef] __rcu *
   kernel/sched/sched.h:2367:25: sparse:    struct task_struct *

vim +2606 kernel/sched/topology.c

  2553	
  2554	/*
  2555	 * Calculate an allowed NUMA imbalance such that LLCs do not get
  2556	 * imbalanced.
  2557	 */
  2558	static void adjust_numa_imbalance(struct sched_domain *sd_llc)
  2559	{
  2560		struct sched_domain *parent;
  2561		unsigned int imb_span = 1;
  2562		unsigned int imb = 0;
  2563		unsigned int nr_llcs;
  2564	
  2565		WARN_ON(!(sd_llc->flags & SD_SHARE_LLC));
  2566		WARN_ON(!sd_llc->parent);
  2567	
  2568		/*
  2569		 * For a single LLC per node, allow an
  2570		 * imbalance up to 12.5% of the node. This is
  2571		 * arbitrary cutoff based two factors -- SMT and
  2572		 * memory channels. For SMT-2, the intent is to
  2573		 * avoid premature sharing of HT resources but
  2574		 * SMT-4 or SMT-8 *may* benefit from a different
  2575		 * cutoff. For memory channels, this is a very
  2576		 * rough estimate of how many channels may be
  2577		 * active and is based on recent CPUs with
  2578		 * many cores.
  2579		 *
  2580		 * For multiple LLCs, allow an imbalance
  2581		 * until multiple tasks would share an LLC
  2582		 * on one node while LLCs on another node
  2583		 * remain idle. This assumes that there are
  2584		 * enough logical CPUs per LLC to avoid SMT
  2585		 * factors and that there is a correlation
  2586		 * between LLCs and memory channels.
  2587		 */
  2588		nr_llcs = sd_llc->parent->span_weight / sd_llc->span_weight;
  2589		if (nr_llcs == 1)
  2590			imb = sd_llc->parent->span_weight >> 3;
  2591		else
  2592			imb = nr_llcs;
  2593	
  2594		imb = max(1U, imb);
  2595		sd_llc->parent->imb_numa_nr = imb;
  2596	
  2597		/*
  2598		 * Set span based on the first NUMA domain.
  2599		 *
  2600		 * NUMA systems always add a NODE domain before
  2601		 * iterating the NUMA domains. Since this is before
  2602		 * degeneration, start from sd_llc's parent's
  2603		 * parent which is the lowest an SD_NUMA domain can
  2604		 * be relative to sd_llc.
  2605		 */
> 2606		parent = sd_llc->parent->parent;
  2607		while (parent && !(parent->flags & SD_NUMA))
> 2608			parent = parent->parent;
  2609	
  2610		imb_span = parent ? parent->span_weight : sd_llc->parent->span_weight;
  2611	
  2612		/* Update the upper remainder of the topology */
  2613		parent = sd_llc->parent;
  2614		while (parent) {
  2615			int factor = max(1U, (parent->span_weight / imb_span));
  2616	
  2617			parent->imb_numa_nr = imb * factor;
  2618			parent = parent->parent;
  2619		}
  2620	}
  2621	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki

Re: [PATCH v4 2/9] sched/topology: Extract "imb_numa_nr" calculation into a separate helper

Posted by K Prateek Nayak 3 weeks, 5 days ago

On 3/12/2026 7:07 PM, kernel test robot wrote:
> sparse warnings: (new ones prefixed by >>)
>    kernel/sched/build_utility.c: note: in included file:
>    kernel/sched/debug.c:730:17: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *[assigned] sd @@     got struct sched_domain [noderef] __rcu *parent @@

So what is out official stance on sparse in the sched bits? Because I
can make this go away with:

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 963007d83216..7bf1f830067f 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2591,7 +2591,7 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
  */
 static void adjust_numa_imbalance(struct sched_domain *sd_llc)
 {
-	struct sched_domain *parent;
+	struct sched_domain __rcu *parent;
 	unsigned int imb_span = 1;
 	unsigned int imb = 0;
 	unsigned int nr_llcs;
---

But I can make a ton more go away by doing:

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 51c29581f15e..7d1efd981caf 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -72,8 +72,8 @@ struct sched_domain_shared {
 
 struct sched_domain {
 	/* These fields must be setup */
-	struct sched_domain __rcu *parent;	/* top domain must be null terminated */
-	struct sched_domain __rcu *child;	/* bottom domain must be null terminated */
+	struct sched_domain *parent;	/* top domain must be null terminated */
+	struct sched_domain *child;	/* bottom domain must be null terminated */
 	struct sched_group *groups;	/* the balancing groups of the domain */
 	unsigned long min_interval;	/* Minimum balance interval ms */
 	unsigned long max_interval;	/* Maximum balance interval ms */
---

"__rcu" evaluates to "noderef, address_space(__rcu)" but we do end up
dereferencing a bunch of these directly (like sd->parent->parent) but
noderef suggests that is illegal?

One place this probably helps is to spot cases where a pointer *needs*
to be accessed via rcu_dereference*() but it isn't - that is indeed nice
to have but ...

Then it also complains about using rcu_dereference*() on pointers that
isn't __rcu annotated but perhaps that is solvable (although some of it
isn't very pretty like "cpumask ** __rcu *sched_domains_numa_masks").
-- 
Thanks and Regards,
Prateek

Re: [PATCH v4 2/9] sched/topology: Extract "imb_numa_nr" calculation into a separate helper

Posted by Peter Zijlstra 3 weeks, 5 days ago

On Thu, Mar 12, 2026 at 09:12:50PM +0530, K Prateek Nayak wrote:
> On 3/12/2026 7:07 PM, kernel test robot wrote:
> > sparse warnings: (new ones prefixed by >>)
> >    kernel/sched/build_utility.c: note: in included file:
> >    kernel/sched/debug.c:730:17: sparse: sparse: incorrect type in assignment (different address spaces) @@     expected struct sched_domain *[assigned] sd @@     got struct sched_domain [noderef] __rcu *parent @@
> 
> So what is out official stance on sparse in the sched bits? Because I
> can make this go away with:

I take patches for correctness :-)

I do not take patches that don't affect correctness but make the code
unreadable -- there was a submission along those lines recently.

I can be convinced to take patches in the middle provided they don't
affect readability too much.

[tip: sched/core] sched/topology: Extract "imb_numa_nr" calculation into a separate helper

Posted by tip-bot2 for K Prateek Nayak 2 weeks, 6 days ago

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     5a7b576b3ec1acc2694c5b58f80cd1d44a11b2c1
Gitweb:        https://git.kernel.org/tip/5a7b576b3ec1acc2694c5b58f80cd1d44a11b2c1
Author:        K Prateek Nayak <kprateek.nayak@amd.com>
AuthorDate:    Thu, 12 Mar 2026 04:44:27 
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 18 Mar 2026 09:06:48 +01:00

sched/topology: Extract "imb_numa_nr" calculation into a separate helper

Subsequent changes to assign "sd->shared" from "s_data" would
necessitate finding the topmost SD_SHARE_LLC to assign shared object to.

This is very similar to the "imb_numa_nr" computation loop except that
"imb_numa_nr" cares about the first domain without the SD_SHARE_LLC flag
(immediate parent of sd_llc) whereas the "sd->shared" assignment would
require sd_llc itself.

Extract the "imb_numa_nr" calculation into a helper
adjust_numa_imbalance() and use the current loop in the
build_sched_domains() to find the sd_llc.

While at it, guard the call behind CONFIG_NUMA's status since
"imb_numa_nr" only makes sense on NUMA enabled configs with SD_NUMA
domains.

No functional changes intended.

Suggested-by: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://patch.msgid.link/20260312044434.1974-3-kprateek.nayak@amd.com
---
 kernel/sched/topology.c | 133 +++++++++++++++++++++++----------------
 1 file changed, 80 insertions(+), 53 deletions(-)

diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 79bab80..6303790 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2550,6 +2550,74 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
 }
 
 /*
+ * Calculate an allowed NUMA imbalance such that LLCs do not get
+ * imbalanced.
+ */
+static void adjust_numa_imbalance(struct sched_domain *sd_llc)
+{
+	struct sched_domain *parent;
+	unsigned int imb_span = 1;
+	unsigned int imb = 0;
+	unsigned int nr_llcs;
+
+	WARN_ON(!(sd_llc->flags & SD_SHARE_LLC));
+	WARN_ON(!sd_llc->parent);
+
+	/*
+	 * For a single LLC per node, allow an
+	 * imbalance up to 12.5% of the node. This is
+	 * arbitrary cutoff based two factors -- SMT and
+	 * memory channels. For SMT-2, the intent is to
+	 * avoid premature sharing of HT resources but
+	 * SMT-4 or SMT-8 *may* benefit from a different
+	 * cutoff. For memory channels, this is a very
+	 * rough estimate of how many channels may be
+	 * active and is based on recent CPUs with
+	 * many cores.
+	 *
+	 * For multiple LLCs, allow an imbalance
+	 * until multiple tasks would share an LLC
+	 * on one node while LLCs on another node
+	 * remain idle. This assumes that there are
+	 * enough logical CPUs per LLC to avoid SMT
+	 * factors and that there is a correlation
+	 * between LLCs and memory channels.
+	 */
+	nr_llcs = sd_llc->parent->span_weight / sd_llc->span_weight;
+	if (nr_llcs == 1)
+		imb = sd_llc->parent->span_weight >> 3;
+	else
+		imb = nr_llcs;
+
+	imb = max(1U, imb);
+	sd_llc->parent->imb_numa_nr = imb;
+
+	/*
+	 * Set span based on the first NUMA domain.
+	 *
+	 * NUMA systems always add a NODE domain before
+	 * iterating the NUMA domains. Since this is before
+	 * degeneration, start from sd_llc's parent's
+	 * parent which is the lowest an SD_NUMA domain can
+	 * be relative to sd_llc.
+	 */
+	parent = sd_llc->parent->parent;
+	while (parent && !(parent->flags & SD_NUMA))
+		parent = parent->parent;
+
+	imb_span = parent ? parent->span_weight : sd_llc->parent->span_weight;
+
+	/* Update the upper remainder of the topology */
+	parent = sd_llc->parent;
+	while (parent) {
+		int factor = max(1U, (parent->span_weight / imb_span));
+
+		parent->imb_numa_nr = imb * factor;
+		parent = parent->parent;
+	}
+}
+
+/*
  * Build sched domains for a given set of CPUs and attach the sched domains
  * to the individual CPUs
  */
@@ -2606,62 +2674,21 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
 		}
 	}
 
-	/*
-	 * Calculate an allowed NUMA imbalance such that LLCs do not get
-	 * imbalanced.
-	 */
 	for_each_cpu(i, cpu_map) {
-		unsigned int imb = 0;
-		unsigned int imb_span = 1;
+		sd = *per_cpu_ptr(d.sd, i);
+		if (!sd)
+			continue;
 
-		for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
-			struct sched_domain *child = sd->child;
-
-			if (!(sd->flags & SD_SHARE_LLC) && child &&
-			    (child->flags & SD_SHARE_LLC)) {
-				struct sched_domain __rcu *top_p;
-				unsigned int nr_llcs;
-
-				/*
-				 * For a single LLC per node, allow an
-				 * imbalance up to 12.5% of the node. This is
-				 * arbitrary cutoff based two factors -- SMT and
-				 * memory channels. For SMT-2, the intent is to
-				 * avoid premature sharing of HT resources but
-				 * SMT-4 or SMT-8 *may* benefit from a different
-				 * cutoff. For memory channels, this is a very
-				 * rough estimate of how many channels may be
-				 * active and is based on recent CPUs with
-				 * many cores.
-				 *
-				 * For multiple LLCs, allow an imbalance
-				 * until multiple tasks would share an LLC
-				 * on one node while LLCs on another node
-				 * remain idle. This assumes that there are
-				 * enough logical CPUs per LLC to avoid SMT
-				 * factors and that there is a correlation
-				 * between LLCs and memory channels.
-				 */
-				nr_llcs = sd->span_weight / child->span_weight;
-				if (nr_llcs == 1)
-					imb = sd->span_weight >> 3;
-				else
-					imb = nr_llcs;
-				imb = max(1U, imb);
-				sd->imb_numa_nr = imb;
-
-				/* Set span based on the first NUMA domain. */
-				top_p = sd->parent;
-				while (top_p && !(top_p->flags & SD_NUMA)) {
-					top_p = top_p->parent;
-				}
-				imb_span = top_p ? top_p->span_weight : sd->span_weight;
-			} else {
-				int factor = max(1U, (sd->span_weight / imb_span));
+		/* First, find the topmost SD_SHARE_LLC domain */
+		while (sd->parent && (sd->parent->flags & SD_SHARE_LLC))
+			sd = sd->parent;
 
-				sd->imb_numa_nr = imb * factor;
-			}
-		}
+		/*
+		 * In presence of higher domains, adjust the
+		 * NUMA imbalance stats for the hierarchy.
+		 */
+		if (IS_ENABLED(CONFIG_NUMA) && (sd->flags & SD_SHARE_LLC) && sd->parent)
+			adjust_numa_imbalance(sd);
 	}
 
 	/* Calculate CPU capacity for physical packages and nodes */