kernel/sched/topology.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-)
Leon noted a topology_span_sane() warning in their guest deployment
starting from v6.16-rc1 [1]. Debug that followed pointed to the
tl->mask() for the NODE domain being incorrectly resolved to that of the
highest NUMA domain.
tl->mask() for NODE is set to the sd_numa_mask() which depends on the
global "sched_domains_curr_level" hack. "sched_domains_curr_level" is
set to the "tl->numa_level" during tl traversal in build_sched_domains()
calling sd_init() but was not reset before topology_span_sane().
Since "tl->numa_level" still reflected the old value from
build_sched_domains(), topology_span_sane() for the NODE domain trips
when the span of the last NUMA domain overlaps.
Instead of replicating the "sched_domains_curr_level" hack, Valentin
suggested using the spans from the sched_domain objects constructed
during build_sched_domains() which can also catch overlaps when the
domain spans are fixed up by build_sched_domain().
Since build_sched_domain() is skipped when tl->mask() of a child domain
already covers the entire cpumap, skip the domains that have an empty
span.
The original warning was reproducible on the following NUMA topology
reported by Leon:
$ sudo numactl -H
available: 5 nodes (0-4)
node 0 cpus: 0 1
node 0 size: 2927 MB
node 0 free: 1603 MB
node 1 cpus: 2 3
node 1 size: 3023 MB
node 1 free: 3008 MB
node 2 cpus: 4 5
node 2 size: 3023 MB
node 2 free: 3007 MB
node 3 cpus: 6 7
node 3 size: 3023 MB
node 3 free: 3002 MB
node 4 cpus: 8 9
node 4 size: 3022 MB
node 4 free: 2718 MB
node distances:
node 0 1 2 3 4
0: 10 39 38 37 36
1: 39 10 38 37 36
2: 38 38 10 37 36
3: 37 37 37 10 36
4: 36 36 36 36 10
The above topology can be mimicked using the following QEMU cmd that was
used to reproduce the warning and test the fix:
sudo qemu-system-x86_64 -enable-kvm -cpu host \
-m 20G -smp cpus=10,sockets=10 -machine q35 \
-object memory-backend-ram,size=4G,id=m0 \
-object memory-backend-ram,size=4G,id=m1 \
-object memory-backend-ram,size=4G,id=m2 \
-object memory-backend-ram,size=4G,id=m3 \
-object memory-backend-ram,size=4G,id=m4 \
-numa node,cpus=0-1,memdev=m0,nodeid=0 \
-numa node,cpus=2-3,memdev=m1,nodeid=1 \
-numa node,cpus=4-5,memdev=m2,nodeid=2 \
-numa node,cpus=6-7,memdev=m3,nodeid=3 \
-numa node,cpus=8-9,memdev=m4,nodeid=4 \
-numa dist,src=0,dst=1,val=39 \
-numa dist,src=0,dst=2,val=38 \
-numa dist,src=0,dst=3,val=37 \
-numa dist,src=0,dst=4,val=36 \
-numa dist,src=1,dst=0,val=39 \
-numa dist,src=1,dst=2,val=38 \
-numa dist,src=1,dst=3,val=37 \
-numa dist,src=1,dst=4,val=36 \
-numa dist,src=2,dst=0,val=38 \
-numa dist,src=2,dst=1,val=38 \
-numa dist,src=2,dst=3,val=37 \
-numa dist,src=2,dst=4,val=36 \
-numa dist,src=3,dst=0,val=37 \
-numa dist,src=3,dst=1,val=37 \
-numa dist,src=3,dst=2,val=37 \
-numa dist,src=3,dst=4,val=36 \
-numa dist,src=4,dst=0,val=36 \
-numa dist,src=4,dst=1,val=36 \
-numa dist,src=4,dst=2,val=36 \
-numa dist,src=4,dst=3,val=36 \
...
Suggested-by: Valentin Schneider <vschneid@redhat.com>
Reported-by: Leon Romanovsky <leon@kernel.org>
Closes: https://lore.kernel.org/lkml/20250610110701.GA256154@unreal/ [1]
Fixes: ccf74128d66c ("sched/topology: Assert non-NUMA topology masks don't (partially) overlap") # ce29a7da84cd, f55dac1dafb3
Reviewed-by: Steve Wahl <steve.wahl@hpe.com>
Tested-by: Valentin Schneider <vschneid@redhat.com>
Reviewed-by: Valentin Schneider <vschneid@redhat.com>
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
Changes are based on tip:sched/core at commit 1eec89a67141
("sched/topology: Remove sched_domain_topology_level::flags")
Changelog v4..v5:
o Rebased the series on top of tip:sched/core to resolve conflicts.
v4: https://lore.kernel.org/lkml/20250709161917.14298-1-kprateek.nayak@amd.com/
---
kernel/sched/topology.c | 24 ++++++++++++++++++------
1 file changed, 18 insertions(+), 6 deletions(-)
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 977e133bb8a4..980b4b75b656 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -2392,6 +2392,7 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
id_seen = sched_domains_tmpmask2;
for_each_sd_topology(tl) {
+ struct sd_data *sdd = &tl->data;
int tl_common_flags = 0;
if (tl->sd_flags)
@@ -2411,22 +2412,33 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
* breaks the linking done for an earlier span.
*/
for_each_cpu(cpu, cpu_map) {
- const struct cpumask *tl_cpu_mask = tl->mask(cpu);
+ struct sched_domain *sd = *per_cpu_ptr(sdd->sd, cpu);
+ struct cpumask *sd_span = sched_domain_span(sd);
int id;
/* lowest bit set in this mask is used as a unique id */
- id = cpumask_first(tl_cpu_mask);
+ id = cpumask_first(sd_span);
+
+ /*
+ * Span can be empty if that topology level won't be
+ * used for this CPU, i.e. a lower level already fully
+ * describes the topology and build_sched_domain()
+ * stopped there.
+ */
+ if (id >= nr_cpu_ids)
+ continue;
if (cpumask_test_cpu(id, id_seen)) {
- /* First CPU has already been seen, ensure identical spans */
- if (!cpumask_equal(tl->mask(id), tl_cpu_mask))
+ /* First CPU has already been seen, ensure identical sd spans */
+ sd = *per_cpu_ptr(sdd->sd, id);
+ if (!cpumask_equal(sched_domain_span(sd), sd_span))
return false;
} else {
/* First CPU hasn't been seen before, ensure it's a completely new span */
- if (cpumask_intersects(tl_cpu_mask, covered))
+ if (cpumask_intersects(sd_span, covered))
return false;
- cpumask_or(covered, covered, tl_cpu_mask);
+ cpumask_or(covered, covered, sd_span);
cpumask_set_cpu(id, id_seen);
}
}
base-commit: 1eec89a671413ce38df9fe9e70f5130a9eb79a59
--
2.34.1
On Tue, Jul 15, 2025 at 04:08:24AM +0000, K Prateek Nayak wrote: > Leon noted a topology_span_sane() warning in their guest deployment > starting from v6.16-rc1 [1]. Debug that followed pointed to the > tl->mask() for the NODE domain being incorrectly resolved to that of the > highest NUMA domain. > > tl->mask() for NODE is set to the sd_numa_mask() which depends on the > global "sched_domains_curr_level" hack. "sched_domains_curr_level" is > set to the "tl->numa_level" during tl traversal in build_sched_domains() > calling sd_init() but was not reset before topology_span_sane(). > > Since "tl->numa_level" still reflected the old value from > build_sched_domains(), topology_span_sane() for the NODE domain trips > when the span of the last NUMA domain overlaps. > > Instead of replicating the "sched_domains_curr_level" hack, Valentin > suggested using the spans from the sched_domain objects constructed > during build_sched_domains() which can also catch overlaps when the > domain spans are fixed up by build_sched_domain(). > > Since build_sched_domain() is skipped when tl->mask() of a child domain > already covers the entire cpumap, skip the domains that have an empty > span. Right, much better explanation that that other patch. And yes, we can do this. However, what's the point of having this ->mask() function if we can't use it and have to hack around it. We should either set ->mask to NULL for NUMA thingies to make sure we don't end up using it again, or bite the bullet and fix up the mask function declaration. Something a little like so? --- arch/powerpc/kernel/smp.c | 19 +++++++++++------ arch/s390/kernel/topology.c | 15 ++++++++----- arch/x86/kernel/smpboot.c | 28 ++++++++++++++++++++---- include/linux/sched/topology.h | 4 +++- include/linux/topology.h | 2 +- kernel/sched/topology.c | 48 ++++++++++++++++++++++++++---------------- 6 files changed, 80 insertions(+), 36 deletions(-) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index f59e4b9cc207..0b5897fff687 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1028,13 +1028,13 @@ static int powerpc_shared_proc_flags(void) * We can't just pass cpu_l2_cache_mask() directly because * returns a non-const pointer and the compiler barfs on that. */ -static const struct cpumask *shared_cache_mask(int cpu) +static const struct cpumask *shared_cache_mask(struct sched_domain_topology_level *tl, int cpu) { return per_cpu(cpu_l2_cache_map, cpu); } #ifdef CONFIG_SCHED_SMT -static const struct cpumask *smallcore_smt_mask(int cpu) +static const struct cpumask *smallcore_smt_mask(struct sched_domain_topology_level *tl, int cpu) { return cpu_smallcore_mask(cpu); } @@ -1054,11 +1054,16 @@ static bool has_coregroup_support(void) return coregroup_enabled; } -static const struct cpumask *cpu_mc_mask(int cpu) +static const struct cpumask *cpu_mc_mask(struct sched_domain_topology_level *tl, int cpu) { return cpu_coregroup_mask(cpu); } +static const struct cpumask *cpu_pkg_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_node_mask(cpu); +} + static int __init init_big_cores(void) { int cpu; @@ -1448,7 +1453,7 @@ static bool update_mask_by_l2(int cpu, cpumask_var_t *mask) return false; } - cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu)); + cpumask_and(*mask, cpu_online_mask, cpu_node_mask(cpu)); /* Update l2-cache mask with all the CPUs that are part of submask */ or_cpumasks_related(cpu, cpu, submask_fn, cpu_l2_cache_mask); @@ -1538,7 +1543,7 @@ static void update_coregroup_mask(int cpu, cpumask_var_t *mask) return; } - cpumask_and(*mask, cpu_online_mask, cpu_cpu_mask(cpu)); + cpumask_and(*mask, cpu_online_mask, cpu_node_mask(cpu)); /* Update coregroup mask with all the CPUs that are part of submask */ or_cpumasks_related(cpu, cpu, submask_fn, cpu_coregroup_mask); @@ -1601,7 +1606,7 @@ static void add_cpu_to_masks(int cpu) /* If chip_id is -1; limit the cpu_core_mask to within PKG */ if (chip_id == -1) - cpumask_and(mask, mask, cpu_cpu_mask(cpu)); + cpumask_and(mask, mask, cpu_node_mask(cpu)); for_each_cpu(i, mask) { if (chip_id == cpu_to_chip_id(i)) { @@ -1716,7 +1721,7 @@ static void __init build_sched_topology(void) SDTL_INIT(cpu_mc_mask, powerpc_shared_proc_flags, MC); } - powerpc_topology[i++] = SDTL_INIT(cpu_cpu_mask, powerpc_shared_proc_flags, PKG); + powerpc_topology[i++] = SDTL_INIT(cpu_pkg_mask, powerpc_shared_proc_flags, PKG); /* There must be one trailing NULL entry left. */ BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 46569b8e47dd..df036ab83920 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -509,33 +509,38 @@ int topology_cpu_init(struct cpu *cpu) return rc; } -static const struct cpumask *cpu_thread_mask(int cpu) +static const struct cpumask *cpu_thread_mask(struct sched_domain_topology_level *tl, int cpu) { return &cpu_topology[cpu].thread_mask; } -const struct cpumask *cpu_coregroup_mask(int cpu) +const struct cpumask *cpu_coregroup_mask(struct sched_domain_topology_level *tl, int cpu) { return &cpu_topology[cpu].core_mask; } -static const struct cpumask *cpu_book_mask(int cpu) +static const struct cpumask *cpu_book_mask(struct sched_domain_topology_level *tl, int cpu) { return &cpu_topology[cpu].book_mask; } -static const struct cpumask *cpu_drawer_mask(int cpu) +static const struct cpumask *cpu_drawer_mask(struct sched_domain_topology_level *tl, int cpu) { return &cpu_topology[cpu].drawer_mask; } +static const struct cpumask *cpu_pkg_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_node_mask(cpu); +} + static struct sched_domain_topology_level s390_topology[] = { SDTL_INIT(cpu_thread_mask, cpu_smt_flags, SMT), SDTL_INIT(cpu_coregroup_mask, cpu_core_flags, MC), SDTL_INIT(cpu_book_mask, NULL, BOOK), SDTL_INIT(cpu_drawer_mask, NULL, DRAWER), - SDTL_INIT(cpu_cpu_mask, NULL, PKG), + SDTL_INIT(cpu_pkg_mask, NULL, PKG), { NULL, }, }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 33e166f6ab12..31a3b57314ef 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -471,6 +471,26 @@ static int x86_cluster_flags(void) } #endif +static const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_smt_mask(cpu); +} + +static const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_clustergroup_mask(cpu); +} + +static const struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_coregroup_mask(cpu); +} + +static const struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_node_mask(cpu); +} + /* * Set if a package/die has multiple NUMA nodes inside. * AMD Magny-Cours, Intel Cluster-on-Die, and Intel @@ -479,14 +499,14 @@ static int x86_cluster_flags(void) static bool x86_has_numa_in_package; static struct sched_domain_topology_level x86_topology[] = { - SDTL_INIT(cpu_smt_mask, cpu_smt_flags, SMT), + SDTL_INIT(tl_smt_mask, cpu_smt_flags, SMT), #ifdef CONFIG_SCHED_CLUSTER - SDTL_INIT(cpu_clustergroup_mask, x86_cluster_flags, CLS), + SDTL_INIT(tl_cls_mask, x86_cluster_flags, CLS), #endif #ifdef CONFIG_SCHED_MC - SDTL_INIT(cpu_coregroup_mask, x86_core_flags, MC), + SDTL_INIT(tl_mc_mask, x86_core_flags, MC), #endif - SDTL_INIT(cpu_cpu_mask, x86_sched_itmt_flags, PKG), + SDTL_INIT(tl_pkg_mask, x86_sched_itmt_flags, PKG), { NULL }, }; diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 5263746b63e8..f0a53b0e67f5 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -172,7 +172,9 @@ bool cpus_equal_capacity(int this_cpu, int that_cpu); bool cpus_share_cache(int this_cpu, int that_cpu); bool cpus_share_resources(int this_cpu, int that_cpu); -typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); +struct sched_domain_topology_level; + +typedef const struct cpumask *(*sched_domain_mask_f)(struct sched_domain_topology_level *tl, int cpu); typedef int (*sched_domain_flags_f)(void); struct sd_data { diff --git a/include/linux/topology.h b/include/linux/topology.h index 33b7fda97d39..6575af39fd10 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -260,7 +260,7 @@ static inline bool topology_is_primary_thread(unsigned int cpu) #endif -static inline const struct cpumask *cpu_cpu_mask(int cpu) +static inline const struct cpumask *cpu_node_mask(int cpu) { return cpumask_of_node(cpu_to_node(cpu)); } diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 977e133bb8a4..8164ffabcd31 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1591,7 +1591,6 @@ static void claim_allocations(int cpu, struct sched_domain *sd) enum numa_topology_type sched_numa_topology_type; static int sched_domains_numa_levels; -static int sched_domains_curr_level; int sched_max_numa_distance; static int *sched_domains_numa_distance; @@ -1632,14 +1631,7 @@ sd_init(struct sched_domain_topology_level *tl, int sd_id, sd_weight, sd_flags = 0; struct cpumask *sd_span; -#ifdef CONFIG_NUMA - /* - * Ugly hack to pass state to sd_numa_mask()... - */ - sched_domains_curr_level = tl->numa_level; -#endif - - sd_weight = cpumask_weight(tl->mask(cpu)); + sd_weight = cpumask_weight(tl->mask(tl, cpu)); if (tl->sd_flags) sd_flags = (*tl->sd_flags)(); @@ -1677,7 +1669,7 @@ sd_init(struct sched_domain_topology_level *tl, }; sd_span = sched_domain_span(sd); - cpumask_and(sd_span, cpu_map, tl->mask(cpu)); + cpumask_and(sd_span, cpu_map, tl->mask(tl, cpu)); sd_id = cpumask_first(sd_span); sd->flags |= asym_cpu_capacity_classify(sd_span, cpu_map); @@ -1732,22 +1724,42 @@ sd_init(struct sched_domain_topology_level *tl, return sd; } +static const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_smt_mask(cpu); +} + +static const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_clustergroup_mask(cpu); +} + +static const struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_coregroup_mask(cpu); +} + +static const struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_node_mask(cpu); +} + /* * Topology list, bottom-up. */ static struct sched_domain_topology_level default_topology[] = { #ifdef CONFIG_SCHED_SMT - SDTL_INIT(cpu_smt_mask, cpu_smt_flags, SMT), + SDTL_INIT(tl_smt_mask, cpu_smt_flags, SMT), #endif #ifdef CONFIG_SCHED_CLUSTER - SDTL_INIT(cpu_clustergroup_mask, cpu_cluster_flags, CLS), + SDTL_INIT(tl_cls_mask, cpu_cluster_flags, CLS), #endif #ifdef CONFIG_SCHED_MC - SDTL_INIT(cpu_coregroup_mask, cpu_core_flags, MC), + SDTL_INIT(tl_mc_mask, cpu_core_flags, MC), #endif - SDTL_INIT(cpu_cpu_mask, NULL, PKG), + SDTL_INIT(tl_pkg_mask, NULL, PKG), { NULL, }, }; @@ -1769,9 +1781,9 @@ void __init set_sched_topology(struct sched_domain_topology_level *tl) #ifdef CONFIG_NUMA -static const struct cpumask *sd_numa_mask(int cpu) +static const struct cpumask *sd_numa_mask(struct sched_domain_topology_level *tl, int cpu) { - return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)]; + return sched_domains_numa_masks[tl->numa_level][cpu_to_node(cpu)]; } static void sched_numa_warn(const char *str) @@ -2411,7 +2423,7 @@ static bool topology_span_sane(const struct cpumask *cpu_map) * breaks the linking done for an earlier span. */ for_each_cpu(cpu, cpu_map) { - const struct cpumask *tl_cpu_mask = tl->mask(cpu); + const struct cpumask *tl_cpu_mask = tl->mask(tl, cpu); int id; /* lowest bit set in this mask is used as a unique id */ @@ -2419,7 +2431,7 @@ static bool topology_span_sane(const struct cpumask *cpu_map) if (cpumask_test_cpu(id, id_seen)) { /* First CPU has already been seen, ensure identical spans */ - if (!cpumask_equal(tl->mask(id), tl_cpu_mask)) + if (!cpumask_equal(tl->mask(tl, id), tl_cpu_mask)) return false; } else { /* First CPU hasn't been seen before, ensure it's a completely new span */
Hello Peter, On 8/25/2025 2:49 PM, Peter Zijlstra wrote: > We should either set ->mask to NULL for NUMA thingies to make sure we > don't end up using it again, or bite the bullet and fix up the mask > function declaration. > > Something a little like so? Both the QEMU VM from the above commit message and my 3rd Generation EPYC are happy with the changes. One concern: > diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c > index 977e133bb8a4..8164ffabcd31 100644 > --- a/kernel/sched/topology.c > +++ b/kernel/sched/topology.c > @@ -1732,22 +1724,42 @@ sd_init(struct sched_domain_topology_level *tl, > return sd; > } > > +static const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu) > +{ > + return cpu_smt_mask(cpu); > +} > + > +static const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu) > +{ > + return q(cpu); > +} > + > +static const struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu) > +{ > + return cpu_coregroup_mask(cpu); > +} > + The above helpers may need guarding behind CONFIG_SCHED_{SMT,CLUSTER,MC} if I'm not mistaken. Possibility for some unification and cleanup with: (Only build and boot tested on x86 on top of tip/master + your diff) diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 0b5897fff687..a0eeb6e39304 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -1054,16 +1054,6 @@ static bool has_coregroup_support(void) return coregroup_enabled; } -static const struct cpumask *cpu_mc_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_coregroup_mask(cpu); -} - -static const struct cpumask *cpu_pkg_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_node_mask(cpu); -} - static int __init init_big_cores(void) { int cpu; @@ -1718,10 +1708,10 @@ static void __init build_sched_topology(void) if (has_coregroup_support()) { powerpc_topology[i++] = - SDTL_INIT(cpu_mc_mask, powerpc_shared_proc_flags, MC); + SDTL_INIT(tl_mc_mask, powerpc_shared_proc_flags, MC); } - powerpc_topology[i++] = SDTL_INIT(cpu_pkg_mask, powerpc_shared_proc_flags, PKG); + powerpc_topology[i++] = SDTL_INIT(tl_pkg_mask, powerpc_shared_proc_flags, PKG); /* There must be one trailing NULL entry left. */ BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index df036ab83920..68d22cf3c604 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -530,17 +530,12 @@ static const struct cpumask *cpu_drawer_mask(struct sched_domain_topology_level return &cpu_topology[cpu].drawer_mask; } -static const struct cpumask *cpu_pkg_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_node_mask(cpu); -} - static struct sched_domain_topology_level s390_topology[] = { SDTL_INIT(cpu_thread_mask, cpu_smt_flags, SMT), SDTL_INIT(cpu_coregroup_mask, cpu_core_flags, MC), SDTL_INIT(cpu_book_mask, NULL, BOOK), SDTL_INIT(cpu_drawer_mask, NULL, DRAWER), - SDTL_INIT(cpu_pkg_mask, NULL, PKG), + SDTL_INIT(tl_pkg_mask, NULL, PKG), { NULL, }, }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 31a3b57314ef..eb289abece23 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -471,26 +471,6 @@ static int x86_cluster_flags(void) } #endif -static const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_smt_mask(cpu); -} - -static const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_clustergroup_mask(cpu); -} - -static const struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_coregroup_mask(cpu); -} - -static const struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_node_mask(cpu); -} - /* * Set if a package/die has multiple NUMA nodes inside. * AMD Magny-Cours, Intel Cluster-on-Die, and Intel diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index f0a53b0e67f5..c7457ccf05c4 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -30,11 +30,19 @@ struct sd_flag_debug { }; extern const struct sd_flag_debug sd_flag_debug[]; +struct sched_domain_topology_level; + #ifdef CONFIG_SCHED_SMT static inline int cpu_smt_flags(void) { return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC; } + +static const __maybe_unused +struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_smt_mask(cpu); +} #endif #ifdef CONFIG_SCHED_CLUSTER @@ -42,6 +50,12 @@ static inline int cpu_cluster_flags(void) { return SD_CLUSTER | SD_SHARE_LLC; } + +static const __maybe_unused +struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_clustergroup_mask(cpu); +} #endif #ifdef CONFIG_SCHED_MC @@ -49,8 +63,20 @@ static inline int cpu_core_flags(void) { return SD_SHARE_LLC; } + +static const __maybe_unused +struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_coregroup_mask(cpu); +} #endif +static const __maybe_unused +struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu) +{ + return cpu_node_mask(cpu); +} + #ifdef CONFIG_NUMA static inline int cpu_numa_flags(void) { @@ -172,8 +198,6 @@ bool cpus_equal_capacity(int this_cpu, int that_cpu); bool cpus_share_cache(int this_cpu, int that_cpu); bool cpus_share_resources(int this_cpu, int that_cpu); -struct sched_domain_topology_level; - typedef const struct cpumask *(*sched_domain_mask_f)(struct sched_domain_topology_level *tl, int cpu); typedef int (*sched_domain_flags_f)(void); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 8164ffabcd31..18889bd97e22 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -1724,26 +1724,6 @@ sd_init(struct sched_domain_topology_level *tl, return sd; } -static const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_smt_mask(cpu); -} - -static const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_clustergroup_mask(cpu); -} - -static const struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_coregroup_mask(cpu); -} - -static const struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cpu) -{ - return cpu_node_mask(cpu); -} - /* * Topology list, bottom-up. */ -- Thoughts? -- Thanks and Regards, Prateek
On Mon, Aug 25, 2025 at 04:17:52PM +0530, K Prateek Nayak wrote: > The above helpers may need guarding behind CONFIG_SCHED_{SMT,CLUSTER,MC} > if I'm not mistaken. Possibility for some unification and cleanup with: > > Thoughts? I hate we need __maybe_unused on static inline functions, but yeah, that might be nicer. Can you fold the lot and stick that nice Changelog on? Then I'll get it into sched/core and we can forget all about this stuff.
On 8/25/2025 4:46 PM, Peter Zijlstra wrote: > On Mon, Aug 25, 2025 at 04:17:52PM +0530, K Prateek Nayak wrote: > >> The above helpers may need guarding behind CONFIG_SCHED_{SMT,CLUSTER,MC} >> if I'm not mistaken. Possibility for some unification and cleanup with: >> > >> Thoughts? > > I hate we need __maybe_unused on static inline functions, but yeah, that > might be nicer. I couldn't find a better solution unfortunately :( > > Can you fold the lot and stick that nice Changelog on? Then I'll get it > into sched/core and we can forget all about this stuff. Okey dokey! Posted the changes as v6 at: https://lore.kernel.org/lkml/20250825120244.11093-1-kprateek.nayak@amd.com/ -- Thanks and Regards, Prateek
© 2016 - 2025 Red Hat, Inc.