From: Li Chen <chenl311@chinatelecom.cn>
Define a small SDTL_INIT(maskfn, flagsfn, name) macro and use it to build the
sched_domain_topology_level array. Purely a cleanup; behaviour is unchanged.
Signed-off-by: Li Chen <chenl311@chinatelecom.cn>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
arch/powerpc/kernel/smp.c | 34 +++++++++++++---------------------
arch/s390/kernel/topology.c | 10 +++++-----
arch/x86/kernel/smpboot.c | 21 ++++++---------------
include/linux/sched/topology.h | 4 ++--
kernel/sched/topology.c | 24 ++++++++----------------
5 files changed, 34 insertions(+), 59 deletions(-)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5ac7084eebc0b..0b7ab7d2eb142 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1700,28 +1700,20 @@ static void __init build_sched_topology(void)
#ifdef CONFIG_SCHED_SMT
if (has_big_cores) {
pr_info("Big cores detected but using small core scheduling\n");
- powerpc_topology[i++] = (struct sched_domain_topology_level){
- smallcore_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT)
- };
- } else {
- powerpc_topology[i++] = (struct sched_domain_topology_level){
- cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT)
- };
- }
+ powerpc_topology[i++] =
+ SDTL_INIT(smallcore_smt_mask, powerpc_smt_flags, SMT);
+ } else
+ powerpc_topology[i++] = SDTL_INIT(cpu_smt_mask, powerpc_smt_flags, SMT);
#endif
- if (shared_caches) {
- powerpc_topology[i++] = (struct sched_domain_topology_level){
- shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE)
- };
- }
- if (has_coregroup_support()) {
- powerpc_topology[i++] = (struct sched_domain_topology_level){
- cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC)
- };
- }
- powerpc_topology[i++] = (struct sched_domain_topology_level){
- cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(PKG)
- };
+ if (shared_caches)
+ powerpc_topology[i++] =
+ SDTL_INIT(shared_cache_mask, powerpc_shared_cache_flags, CACHE);
+
+ if (has_coregroup_support())
+ powerpc_topology[i++] =
+ SDTL_INIT(cpu_mc_mask, powerpc_shared_proc_flags, MC);
+
+ powerpc_topology[i++] = SDTL_INIT(cpu_cpu_mask, powerpc_shared_proc_flags, PKG);
/* There must be one trailing NULL entry left. */
BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 3df048e190b11..46569b8e47dde 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -531,11 +531,11 @@ static const struct cpumask *cpu_drawer_mask(int cpu)
}
static struct sched_domain_topology_level s390_topology[] = {
- { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
- { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
- { cpu_book_mask, SD_INIT_NAME(BOOK) },
- { cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
- { cpu_cpu_mask, SD_INIT_NAME(PKG) },
+ SDTL_INIT(cpu_thread_mask, cpu_smt_flags, SMT),
+ SDTL_INIT(cpu_coregroup_mask, cpu_core_flags, MC),
+ SDTL_INIT(cpu_book_mask, NULL, BOOK),
+ SDTL_INIT(cpu_drawer_mask, NULL, DRAWER),
+ SDTL_INIT(cpu_cpu_mask, NULL, PKG),
{ NULL, },
};
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 58ede3fa6a75b..445127df2cb19 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -485,35 +485,26 @@ static void __init build_sched_topology(void)
int i = 0;
#ifdef CONFIG_SCHED_SMT
- x86_topology[i++] = (struct sched_domain_topology_level){
- cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT)
- };
+ x86_topology[i++] = SDTL_INIT(cpu_smt_mask, cpu_smt_flags, SMT);
#endif
#ifdef CONFIG_SCHED_CLUSTER
- x86_topology[i++] = (struct sched_domain_topology_level){
- cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS)
- };
+ x86_topology[i++] = SDTL_INIT(cpu_clustergroup_mask, x86_cluster_flags, CLS);
#endif
#ifdef CONFIG_SCHED_MC
- x86_topology[i++] = (struct sched_domain_topology_level){
- cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC)
- };
+ x86_topology[i++] = SDTL_INIT(cpu_coregroup_mask, x86_core_flags, MC);
#endif
/*
* When there is NUMA topology inside the package skip the PKG domain
* since the NUMA domains will auto-magically create the right spanning
* domains based on the SLIT.
*/
- if (!x86_has_numa_in_package) {
- x86_topology[i++] = (struct sched_domain_topology_level){
- cpu_cpu_mask, x86_sched_itmt_flags, SD_INIT_NAME(PKG)
- };
- }
+ if (!x86_has_numa_in_package)
+ x86_topology[i++] = SDTL_INIT(cpu_cpu_mask, x86_sched_itmt_flags, PKG);
/*
* There must be one trailing NULL entry left.
*/
- BUG_ON(i >= ARRAY_SIZE(x86_topology)-1);
+ BUG_ON(i >= ARRAY_SIZE(x86_topology) - 1);
set_sched_topology(x86_topology);
}
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index e54e7fa76ba63..0d5daaa277b75 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -196,8 +196,8 @@ struct sched_domain_topology_level {
extern void __init set_sched_topology(struct sched_domain_topology_level *tl);
extern void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio);
-
-# define SD_INIT_NAME(type) .name = #type
+#define SDTL_INIT(maskfn, flagsfn, dname) ((struct sched_domain_topology_level) \
+ { .mask = maskfn, .sd_flags = flagsfn, .name = #dname })
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
extern void rebuild_sched_domains_energy(void);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 8e06b1d22e91e..d01f5a49f2e7a 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1737,17 +1737,17 @@ sd_init(struct sched_domain_topology_level *tl,
*/
static struct sched_domain_topology_level default_topology[] = {
#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+ SDTL_INIT(cpu_smt_mask, cpu_smt_flags, SMT),
#endif
#ifdef CONFIG_SCHED_CLUSTER
- { cpu_clustergroup_mask, cpu_cluster_flags, SD_INIT_NAME(CLS) },
+ SDTL_INIT(cpu_clustergroup_mask, cpu_cluster_flags, CLS),
#endif
#ifdef CONFIG_SCHED_MC
- { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+ SDTL_INIT(cpu_coregroup_mask, cpu_core_flags, MC),
#endif
- { cpu_cpu_mask, SD_INIT_NAME(PKG) },
+ SDTL_INIT(cpu_cpu_mask, NULL, PKG),
{ NULL, },
};
@@ -2008,23 +2008,15 @@ void sched_init_numa(int offline_node)
/*
* Add the NUMA identity distance, aka single NODE.
*/
- tl[i++] = (struct sched_domain_topology_level){
- .mask = sd_numa_mask,
- .numa_level = 0,
- SD_INIT_NAME(NODE)
- };
+ tl[i++] = SDTL_INIT(sd_numa_mask, NULL, NODE);
/*
* .. and append 'j' levels of NUMA goodness.
*/
for (j = 1; j < nr_levels; i++, j++) {
- tl[i] = (struct sched_domain_topology_level){
- .mask = sd_numa_mask,
- .sd_flags = cpu_numa_flags,
- .flags = SDTL_OVERLAP,
- .numa_level = j,
- SD_INIT_NAME(NUMA)
- };
+ tl[i] = SDTL_INIT(sd_numa_mask, cpu_numa_flags, NUMA);
+ tl[i].numa_level = j;
+ tl[i].flags = SDTL_OVERLAP;
}
sched_domain_topology_saved = sched_domain_topology;
--
2.50.0
On Thu, Jul 10, 2025 at 06:57:07PM +0800, Li Chen wrote: > From: Li Chen <chenl311@chinatelecom.cn> > > Define a small SDTL_INIT(maskfn, flagsfn, name) macro and use it to build the > sched_domain_topology_level array. Purely a cleanup; behaviour is unchanged. > > Signed-off-by: Li Chen <chenl311@chinatelecom.cn> > Suggested-by: Thomas Gleixner <tglx@linutronix.de> > Tested-by: K Prateek Nayak <kprateek.nayak@amd.com> > --- > arch/powerpc/kernel/smp.c | 34 +++++++++++++--------------------- > arch/s390/kernel/topology.c | 10 +++++----- > arch/x86/kernel/smpboot.c | 21 ++++++--------------- > include/linux/sched/topology.h | 4 ++-- > kernel/sched/topology.c | 24 ++++++++---------------- > 5 files changed, 34 insertions(+), 59 deletions(-) > > diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c > index 5ac7084eebc0b..0b7ab7d2eb142 100644 > --- a/arch/powerpc/kernel/smp.c > +++ b/arch/powerpc/kernel/smp.c > @@ -1700,28 +1700,20 @@ static void __init build_sched_topology(void) > #ifdef CONFIG_SCHED_SMT > if (has_big_cores) { > pr_info("Big cores detected but using small core scheduling\n"); > - powerpc_topology[i++] = (struct sched_domain_topology_level){ > - smallcore_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) > - }; > - } else { > - powerpc_topology[i++] = (struct sched_domain_topology_level){ > - cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) > - }; > - } > + powerpc_topology[i++] = > + SDTL_INIT(smallcore_smt_mask, powerpc_smt_flags, SMT); > + } else > + powerpc_topology[i++] = SDTL_INIT(cpu_smt_mask, powerpc_smt_flags, SMT); > #endif > - if (shared_caches) { > - powerpc_topology[i++] = (struct sched_domain_topology_level){ > - shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE) > - }; > - } > - if (has_coregroup_support()) { > - powerpc_topology[i++] = (struct sched_domain_topology_level){ > - cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC) > - }; > - } > - powerpc_topology[i++] = (struct sched_domain_topology_level){ > - cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(PKG) > - }; > + if (shared_caches) > + powerpc_topology[i++] = > + SDTL_INIT(shared_cache_mask, powerpc_shared_cache_flags, CACHE); > + > + if (has_coregroup_support()) > + powerpc_topology[i++] = > + SDTL_INIT(cpu_mc_mask, powerpc_shared_proc_flags, MC); > + > + powerpc_topology[i++] = SDTL_INIT(cpu_cpu_mask, powerpc_shared_proc_flags, PKG); You wrecked coding-style here and lost a bunch of curlies. I've fixed that up for you.
On 7/10/2025 4:27 PM, Li Chen wrote: > /* > * .. and append 'j' levels of NUMA goodness. > */ > for (j = 1; j < nr_levels; i++, j++) { > - tl[i] = (struct sched_domain_topology_level){ > - .mask = sd_numa_mask, > - .sd_flags = cpu_numa_flags, > - .flags = SDTL_OVERLAP, > - .numa_level = j, > - SD_INIT_NAME(NUMA) > - }; > + tl[i] = SDTL_INIT(sd_numa_mask, cpu_numa_flags, NUMA); > + tl[i].numa_level = j; > + tl[i].flags = SDTL_OVERLAP; Tangential discussion: I was looking at this and was wondering why we need a "tl->flags" when there is already sd_flags() function and we can simply add SD_OVERLAP to sd_numa_flags(). I think "tl->flags" was needed when the idea of overlap domains was added in commit e3589f6c81e4 ("sched: Allow for overlapping sched_domain spans") when it depended on "FORCE_SD_OVERLAP" sched_feat() which allowed toggling this off but that was done away with in commit af85596c74de ("sched/topology: Remove FORCE_SD_OVERLAP") so perhaps we can get rid of it now? Relying on SD_NUMA should be enough currently. Peter, Valentin, what do you think of something like below? (Build and boot tested on top of this series on tip:sched/core) diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h index b04a5d04dee9..42839cfa2778 100644 --- a/include/linux/sched/sd_flags.h +++ b/include/linux/sched/sd_flags.h @@ -153,14 +153,6 @@ SD_FLAG(SD_ASYM_PACKING, SDF_NEEDS_GROUPS) */ SD_FLAG(SD_PREFER_SIBLING, SDF_NEEDS_GROUPS) -/* - * sched_groups of this level overlap - * - * SHARED_PARENT: Set for all NUMA levels above NODE. - * NEEDS_GROUPS: Overlaps can only exist with more than one group. - */ -SD_FLAG(SD_OVERLAP, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS) - /* * Cross-node balancing * diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 0d5daaa277b7..5263746b63e8 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -175,8 +175,6 @@ bool cpus_share_resources(int this_cpu, int that_cpu); typedef const struct cpumask *(*sched_domain_mask_f)(int cpu); typedef int (*sched_domain_flags_f)(void); -#define SDTL_OVERLAP 0x01 - struct sd_data { struct sched_domain *__percpu *sd; struct sched_domain_shared *__percpu *sds; @@ -187,7 +185,6 @@ struct sd_data { struct sched_domain_topology_level { sched_domain_mask_f mask; sched_domain_flags_f sd_flags; - int flags; int numa_level; struct sd_data data; char *name; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 20a845697c1d..b9b4bbbf0af6 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -9926,9 +9926,9 @@ void update_group_capacity(struct sched_domain *sd, int cpu) min_capacity = ULONG_MAX; max_capacity = 0; - if (child->flags & SD_OVERLAP) { + if (child->flags & SD_NUMA) { /* - * SD_OVERLAP domains cannot assume that child groups + * SD_NUMA domains cannot assume that child groups * span the current group. */ @@ -9941,7 +9941,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu) } } else { /* - * !SD_OVERLAP domains can assume that child groups + * !SD_NUMA domains can assume that child groups * span the current group. */ diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index d01f5a49f2e7..977e133bb8a4 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -89,7 +89,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, break; } - if (!(sd->flags & SD_OVERLAP) && + if (!(sd->flags & SD_NUMA) && cpumask_intersects(groupmask, sched_group_span(group))) { printk(KERN_CONT "\n"); printk(KERN_ERR "ERROR: repeated CPUs\n"); @@ -102,7 +102,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level, group->sgc->id, cpumask_pr_args(sched_group_span(group))); - if ((sd->flags & SD_OVERLAP) && + if ((sd->flags & SD_NUMA) && !cpumask_equal(group_balance_mask(group), sched_group_span(group))) { printk(KERN_CONT " mask=%*pbl", cpumask_pr_args(group_balance_mask(group))); @@ -1344,7 +1344,7 @@ void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio) * "sg->asym_prefer_cpu" to "sg->sgc->asym_prefer_cpu" * which is shared by all the overlapping groups. */ - WARN_ON_ONCE(sd->flags & SD_OVERLAP); + WARN_ON_ONCE(sd->flags & SD_NUMA); sg = sd->groups; if (cpu != sg->asym_prefer_cpu) { @@ -2016,7 +2016,6 @@ void sched_init_numa(int offline_node) for (j = 1; j < nr_levels; i++, j++) { tl[i] = SDTL_INIT(sd_numa_mask, cpu_numa_flags, NUMA); tl[i].numa_level = j; - tl[i].flags = SDTL_OVERLAP; } sched_domain_topology_saved = sched_domain_topology; @@ -2327,7 +2326,7 @@ static void __sdt_free(const struct cpumask *cpu_map) if (sdd->sd) { sd = *per_cpu_ptr(sdd->sd, j); - if (sd && (sd->flags & SD_OVERLAP)) + if (sd && (sd->flags & SD_NUMA)) free_sched_groups(sd->groups, 0); kfree(*per_cpu_ptr(sdd->sd, j)); } @@ -2393,9 +2392,13 @@ static bool topology_span_sane(const struct cpumask *cpu_map) id_seen = sched_domains_tmpmask2; for_each_sd_topology(tl) { + int tl_common_flags = 0; + + if (tl->sd_flags) + tl_common_flags = (*tl->sd_flags)(); /* NUMA levels are allowed to overlap */ - if (tl->flags & SDTL_OVERLAP) + if (tl_common_flags & SD_NUMA) continue; cpumask_clear(covered); @@ -2466,8 +2469,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att if (tl == sched_domain_topology) *per_cpu_ptr(d.sd, i) = sd; - if (tl->flags & SDTL_OVERLAP) - sd->flags |= SD_OVERLAP; if (cpumask_equal(cpu_map, sched_domain_span(sd))) break; } @@ -2480,7 +2481,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att for_each_cpu(i, cpu_map) { for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) { sd->span_weight = cpumask_weight(sched_domain_span(sd)); - if (sd->flags & SD_OVERLAP) { + if (sd->flags & SD_NUMA) { if (build_overlap_sched_groups(sd, i)) goto error; } else { --- We can also keep SD_OVERLAP and only remove SDTL_OVERLAP, tl->flags if that is preferred or just have them both if you see a future !NUMA usecases for overlapping domains. > } > > sched_domain_topology_saved = sched_domain_topology; -- Thanks and Regards, Prateek
On 11/07/25 11:20, K Prateek Nayak wrote: > Tangential discussion: I was looking at this and was wondering why we > need a "tl->flags" when there is already sd_flags() function and we can > simply add SD_OVERLAP to sd_numa_flags(). > > I think "tl->flags" was needed when the idea of overlap domains was > added in commit e3589f6c81e4 ("sched: Allow for overlapping sched_domain > spans") when it depended on "FORCE_SD_OVERLAP" sched_feat() which > allowed toggling this off but that was done away with in commit > af85596c74de ("sched/topology: Remove FORCE_SD_OVERLAP") so perhaps we > can get rid of it now? > > Relying on SD_NUMA should be enough currently. Peter, Valentin, what do > you think of something like below? > I remember asking myself the same question when I mucked about the SD flags; I ended up convincing myself to let it be but I couldn't find any note as to why. Looking at things in their current state, I agree with you, we could just bin it.
On Fri, Jul 11, 2025 at 11:20:30AM +0530, K Prateek Nayak wrote: > On 7/10/2025 4:27 PM, Li Chen wrote: > > /* > > * .. and append 'j' levels of NUMA goodness. > > */ > > for (j = 1; j < nr_levels; i++, j++) { > > - tl[i] = (struct sched_domain_topology_level){ > > - .mask = sd_numa_mask, > > - .sd_flags = cpu_numa_flags, > > - .flags = SDTL_OVERLAP, > > - .numa_level = j, > > - SD_INIT_NAME(NUMA) > > - }; > > + tl[i] = SDTL_INIT(sd_numa_mask, cpu_numa_flags, NUMA); > > + tl[i].numa_level = j; > > + tl[i].flags = SDTL_OVERLAP; > > Tangential discussion: I was looking at this and was wondering why we > need a "tl->flags" when there is already sd_flags() function and we can > simply add SD_OVERLAP to sd_numa_flags(). > > I think "tl->flags" was needed when the idea of overlap domains was > added in commit e3589f6c81e4 ("sched: Allow for overlapping sched_domain > spans") when it depended on "FORCE_SD_OVERLAP" sched_feat() which > allowed toggling this off but that was done away with in commit > af85596c74de ("sched/topology: Remove FORCE_SD_OVERLAP") so perhaps we > can get rid of it now? > > Relying on SD_NUMA should be enough currently. Peter, Valentin, what do > you think of something like below? I think you're right. SD_NUMA appears to be the one and only case that also has SDTL_OVERLAP which then results in setting SD_OVERLAP, making SD_NUMA and SD_OVERLAP equivalent and SDTL_OVERLAP redundant. I'll presume you're okay with me adding your SoB to things, and I'll push out all 5 patches to queue/sched/core to let the robots have a go at things.
(trimming the cc to only kernel/sched folks to reduce the noise) On 7/11/2025 6:36 PM, Peter Zijlstra wrote: > On Fri, Jul 11, 2025 at 11:20:30AM +0530, K Prateek Nayak wrote: >> On 7/10/2025 4:27 PM, Li Chen wrote: >>> /* >>> * .. and append 'j' levels of NUMA goodness. >>> */ >>> for (j = 1; j < nr_levels; i++, j++) { >>> - tl[i] = (struct sched_domain_topology_level){ >>> - .mask = sd_numa_mask, >>> - .sd_flags = cpu_numa_flags, >>> - .flags = SDTL_OVERLAP, >>> - .numa_level = j, >>> - SD_INIT_NAME(NUMA) >>> - }; >>> + tl[i] = SDTL_INIT(sd_numa_mask, cpu_numa_flags, NUMA); >>> + tl[i].numa_level = j; >>> + tl[i].flags = SDTL_OVERLAP; >> >> Tangential discussion: I was looking at this and was wondering why we >> need a "tl->flags" when there is already sd_flags() function and we can >> simply add SD_OVERLAP to sd_numa_flags(). >> >> I think "tl->flags" was needed when the idea of overlap domains was >> added in commit e3589f6c81e4 ("sched: Allow for overlapping sched_domain >> spans") when it depended on "FORCE_SD_OVERLAP" sched_feat() which >> allowed toggling this off but that was done away with in commit >> af85596c74de ("sched/topology: Remove FORCE_SD_OVERLAP") so perhaps we >> can get rid of it now? >> >> Relying on SD_NUMA should be enough currently. Peter, Valentin, what do >> you think of something like below? > > I think you're right. SD_NUMA appears to be the one and only case that > also has SDTL_OVERLAP which then results in setting SD_OVERLAP, making > SD_NUMA and SD_OVERLAP equivalent and SDTL_OVERLAP redundant. > > I'll presume you're okay with me adding your SoB to things, and I'll > push out all 5 patches to queue/sched/core to let the robots have a go > at things. Works for me! If you need a formal commit message: Support for overlapping domains added in commit e3589f6c81e4 ("sched: Allow for overlapping sched_domain spans") also allowed forcefully setting SD_OVERLAP for !NUMA domains via FORCE_SD_OVERLAP sched_feat(). Since NUMA domains had to be presumed overlapping to ensure correct behavior, "sched_domain_topology_level::flags" was introduced. NUMA domains added the SDTL_OVERLAP flag would ensure SD_OVERLAP was always added during build_sched_domains() for these domains, even when FORCE_SD_OVERLAP was off. Condition for adding the SD_OVERLAP flag at the aforementioned commit was as follows: if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP)) sd->flags |= SD_OVERLAP; The FORCE_SD_OVERLAP debug feature was removed in commit af85596c74de ("sched/topology: Remove FORCE_SD_OVERLAP") which left the NUMA domains as the exclusive users of SDTL_OVERLAP, SD_OVERLAP, and SD_NUMA flags. Get rid of SDTL_OVERLAP and SD_OVERLAP as they have become redundant and instead rely on SD_NUMA to detect the only overlapping domain currently supported. Since SDTL_OVERLAP was the only user of "tl->flags", get rid of "sched_domain_topology_level::flags" too. Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com> --- P.S. Are we still considering the following for v6.16 cycle? https://lore.kernel.org/lkml/20250709161917.14298-1-kprateek.nayak@amd.com/ If not, I can rebase it on top of queue:sched/core and send it out with the conflicts resolved to save you a couple of edits :) -- Thanks and Regards, Prateek
On Mon, Jul 14, 2025 at 09:33:42AM +0530, K Prateek Nayak wrote: > Works for me! If you need a formal commit message: Thanks, much better than the badly edited thing I put in place. > P.S. Are we still considering the following for v6.16 cycle? > https://lore.kernel.org/lkml/20250709161917.14298-1-kprateek.nayak@amd.com/ > > If not, I can rebase it on top of queue:sched/core and send it out with > the conflicts resolved to save you a couple of edits :) Already done, about to push out.
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 1eec89a671413ce38df9fe9e70f5130a9eb79a59
Gitweb: https://git.kernel.org/tip/1eec89a671413ce38df9fe9e70f5130a9eb79a59
Author: K Prateek Nayak <kprateek.nayak@amd.com>
AuthorDate: Fri, 11 Jul 2025 11:20:30 +05:30
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 14 Jul 2025 10:59:35 +02:00
sched/topology: Remove sched_domain_topology_level::flags
Support for overlapping domains added in commit e3589f6c81e4 ("sched:
Allow for overlapping sched_domain spans") also allowed forcefully
setting SD_OVERLAP for !NUMA domains via FORCE_SD_OVERLAP sched_feat().
Since NUMA domains had to be presumed overlapping to ensure correct
behavior, "sched_domain_topology_level::flags" was introduced. NUMA
domains added the SDTL_OVERLAP flag would ensure SD_OVERLAP was always
added during build_sched_domains() for these domains, even when
FORCE_SD_OVERLAP was off.
Condition for adding the SD_OVERLAP flag at the aforementioned commit
was as follows:
if (tl->flags & SDTL_OVERLAP || sched_feat(FORCE_SD_OVERLAP))
sd->flags |= SD_OVERLAP;
The FORCE_SD_OVERLAP debug feature was removed in commit af85596c74de
("sched/topology: Remove FORCE_SD_OVERLAP") which left the NUMA domains
as the exclusive users of SDTL_OVERLAP, SD_OVERLAP, and SD_NUMA flags.
Get rid of SDTL_OVERLAP and SD_OVERLAP as they have become redundant
and instead rely on SD_NUMA to detect the only overlapping domain
currently supported. Since SDTL_OVERLAP was the only user of
"tl->flags", get rid of "sched_domain_topology_level::flags" too.
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/ba4dbdf8-bc37-493d-b2e0-2efb00ea3e19@amd.com
---
include/linux/sched/sd_flags.h | 8 --------
include/linux/sched/topology.h | 3 ---
kernel/sched/fair.c | 6 +++---
kernel/sched/topology.c | 19 ++++++++++---------
4 files changed, 13 insertions(+), 23 deletions(-)
diff --git a/include/linux/sched/sd_flags.h b/include/linux/sched/sd_flags.h
index b04a5d0..42839cf 100644
--- a/include/linux/sched/sd_flags.h
+++ b/include/linux/sched/sd_flags.h
@@ -154,14 +154,6 @@ SD_FLAG(SD_ASYM_PACKING, SDF_NEEDS_GROUPS)
SD_FLAG(SD_PREFER_SIBLING, SDF_NEEDS_GROUPS)
/*
- * sched_groups of this level overlap
- *
- * SHARED_PARENT: Set for all NUMA levels above NODE.
- * NEEDS_GROUPS: Overlaps can only exist with more than one group.
- */
-SD_FLAG(SD_OVERLAP, SDF_SHARED_PARENT | SDF_NEEDS_GROUPS)
-
-/*
* Cross-node balancing
*
* SHARED_PARENT: Set for all NUMA levels above NODE.
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 0d5daaa..5263746 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -175,8 +175,6 @@ bool cpus_share_resources(int this_cpu, int that_cpu);
typedef const struct cpumask *(*sched_domain_mask_f)(int cpu);
typedef int (*sched_domain_flags_f)(void);
-#define SDTL_OVERLAP 0x01
-
struct sd_data {
struct sched_domain *__percpu *sd;
struct sched_domain_shared *__percpu *sds;
@@ -187,7 +185,6 @@ struct sd_data {
struct sched_domain_topology_level {
sched_domain_mask_f mask;
sched_domain_flags_f sd_flags;
- int flags;
int numa_level;
struct sd_data data;
char *name;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 20a8456..b9b4bbb 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9926,9 +9926,9 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
min_capacity = ULONG_MAX;
max_capacity = 0;
- if (child->flags & SD_OVERLAP) {
+ if (child->flags & SD_NUMA) {
/*
- * SD_OVERLAP domains cannot assume that child groups
+ * SD_NUMA domains cannot assume that child groups
* span the current group.
*/
@@ -9941,7 +9941,7 @@ void update_group_capacity(struct sched_domain *sd, int cpu)
}
} else {
/*
- * !SD_OVERLAP domains can assume that child groups
+ * !SD_NUMA domains can assume that child groups
* span the current group.
*/
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index d01f5a4..977e133 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -89,7 +89,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
break;
}
- if (!(sd->flags & SD_OVERLAP) &&
+ if (!(sd->flags & SD_NUMA) &&
cpumask_intersects(groupmask, sched_group_span(group))) {
printk(KERN_CONT "\n");
printk(KERN_ERR "ERROR: repeated CPUs\n");
@@ -102,7 +102,7 @@ static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
group->sgc->id,
cpumask_pr_args(sched_group_span(group)));
- if ((sd->flags & SD_OVERLAP) &&
+ if ((sd->flags & SD_NUMA) &&
!cpumask_equal(group_balance_mask(group), sched_group_span(group))) {
printk(KERN_CONT " mask=%*pbl",
cpumask_pr_args(group_balance_mask(group)));
@@ -1344,7 +1344,7 @@ void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio)
* "sg->asym_prefer_cpu" to "sg->sgc->asym_prefer_cpu"
* which is shared by all the overlapping groups.
*/
- WARN_ON_ONCE(sd->flags & SD_OVERLAP);
+ WARN_ON_ONCE(sd->flags & SD_NUMA);
sg = sd->groups;
if (cpu != sg->asym_prefer_cpu) {
@@ -2016,7 +2016,6 @@ void sched_init_numa(int offline_node)
for (j = 1; j < nr_levels; i++, j++) {
tl[i] = SDTL_INIT(sd_numa_mask, cpu_numa_flags, NUMA);
tl[i].numa_level = j;
- tl[i].flags = SDTL_OVERLAP;
}
sched_domain_topology_saved = sched_domain_topology;
@@ -2327,7 +2326,7 @@ static void __sdt_free(const struct cpumask *cpu_map)
if (sdd->sd) {
sd = *per_cpu_ptr(sdd->sd, j);
- if (sd && (sd->flags & SD_OVERLAP))
+ if (sd && (sd->flags & SD_NUMA))
free_sched_groups(sd->groups, 0);
kfree(*per_cpu_ptr(sdd->sd, j));
}
@@ -2393,9 +2392,13 @@ static bool topology_span_sane(const struct cpumask *cpu_map)
id_seen = sched_domains_tmpmask2;
for_each_sd_topology(tl) {
+ int tl_common_flags = 0;
+
+ if (tl->sd_flags)
+ tl_common_flags = (*tl->sd_flags)();
/* NUMA levels are allowed to overlap */
- if (tl->flags & SDTL_OVERLAP)
+ if (tl_common_flags & SD_NUMA)
continue;
cpumask_clear(covered);
@@ -2466,8 +2469,6 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
if (tl == sched_domain_topology)
*per_cpu_ptr(d.sd, i) = sd;
- if (tl->flags & SDTL_OVERLAP)
- sd->flags |= SD_OVERLAP;
if (cpumask_equal(cpu_map, sched_domain_span(sd)))
break;
}
@@ -2480,7 +2481,7 @@ build_sched_domains(const struct cpumask *cpu_map, struct sched_domain_attr *att
for_each_cpu(i, cpu_map) {
for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
sd->span_weight = cpumask_weight(sched_domain_span(sd));
- if (sd->flags & SD_OVERLAP) {
+ if (sd->flags & SD_NUMA) {
if (build_overlap_sched_groups(sd, i))
goto error;
} else {
The following commit has been merged into the sched/core branch of tip:
Commit-ID: e075f4360931263f5ec006ea5dadc065e5e98eb8
Gitweb: https://git.kernel.org/tip/e075f4360931263f5ec006ea5dadc065e5e98eb8
Author: Li Chen <chenl311@chinatelecom.cn>
AuthorDate: Thu, 10 Jul 2025 18:57:07 +08:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 14 Jul 2025 10:59:34 +02:00
smpboot: introduce SDTL_INIT() helper to tidy sched topology setup
Define a small SDTL_INIT(maskfn, flagsfn, name) macro and use it to build the
sched_domain_topology_level array. Purely a cleanup; behaviour is unchanged.
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Li Chen <chenl311@chinatelecom.cn>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Link: https://lore.kernel.org/r/20250710105715.66594-2-me@linux.beauty
---
arch/powerpc/kernel/smp.c | 25 ++++++++++---------------
arch/s390/kernel/topology.c | 10 +++++-----
arch/x86/kernel/smpboot.c | 21 ++++++---------------
include/linux/sched/topology.h | 4 ++--
kernel/sched/topology.c | 24 ++++++++----------------
5 files changed, 31 insertions(+), 53 deletions(-)
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 5ac7084..f59e4b9 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -1700,28 +1700,23 @@ static void __init build_sched_topology(void)
#ifdef CONFIG_SCHED_SMT
if (has_big_cores) {
pr_info("Big cores detected but using small core scheduling\n");
- powerpc_topology[i++] = (struct sched_domain_topology_level){
- smallcore_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT)
- };
+ powerpc_topology[i++] =
+ SDTL_INIT(smallcore_smt_mask, powerpc_smt_flags, SMT);
} else {
- powerpc_topology[i++] = (struct sched_domain_topology_level){
- cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT)
- };
+ powerpc_topology[i++] = SDTL_INIT(cpu_smt_mask, powerpc_smt_flags, SMT);
}
#endif
if (shared_caches) {
- powerpc_topology[i++] = (struct sched_domain_topology_level){
- shared_cache_mask, powerpc_shared_cache_flags, SD_INIT_NAME(CACHE)
- };
+ powerpc_topology[i++] =
+ SDTL_INIT(shared_cache_mask, powerpc_shared_cache_flags, CACHE);
}
+
if (has_coregroup_support()) {
- powerpc_topology[i++] = (struct sched_domain_topology_level){
- cpu_mc_mask, powerpc_shared_proc_flags, SD_INIT_NAME(MC)
- };
+ powerpc_topology[i++] =
+ SDTL_INIT(cpu_mc_mask, powerpc_shared_proc_flags, MC);
}
- powerpc_topology[i++] = (struct sched_domain_topology_level){
- cpu_cpu_mask, powerpc_shared_proc_flags, SD_INIT_NAME(PKG)
- };
+
+ powerpc_topology[i++] = SDTL_INIT(cpu_cpu_mask, powerpc_shared_proc_flags, PKG);
/* There must be one trailing NULL entry left. */
BUG_ON(i >= ARRAY_SIZE(powerpc_topology) - 1);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 3df048e..46569b8 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -531,11 +531,11 @@ static const struct cpumask *cpu_drawer_mask(int cpu)
}
static struct sched_domain_topology_level s390_topology[] = {
- { cpu_thread_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
- { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
- { cpu_book_mask, SD_INIT_NAME(BOOK) },
- { cpu_drawer_mask, SD_INIT_NAME(DRAWER) },
- { cpu_cpu_mask, SD_INIT_NAME(PKG) },
+ SDTL_INIT(cpu_thread_mask, cpu_smt_flags, SMT),
+ SDTL_INIT(cpu_coregroup_mask, cpu_core_flags, MC),
+ SDTL_INIT(cpu_book_mask, NULL, BOOK),
+ SDTL_INIT(cpu_drawer_mask, NULL, DRAWER),
+ SDTL_INIT(cpu_cpu_mask, NULL, PKG),
{ NULL, },
};
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index fc78c23..e0adf75 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -485,35 +485,26 @@ static void __init build_sched_topology(void)
int i = 0;
#ifdef CONFIG_SCHED_SMT
- x86_topology[i++] = (struct sched_domain_topology_level){
- cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT)
- };
+ x86_topology[i++] = SDTL_INIT(cpu_smt_mask, cpu_smt_flags, SMT);
#endif
#ifdef CONFIG_SCHED_CLUSTER
- x86_topology[i++] = (struct sched_domain_topology_level){
- cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS)
- };
+ x86_topology[i++] = SDTL_INIT(cpu_clustergroup_mask, x86_cluster_flags, CLS);
#endif
#ifdef CONFIG_SCHED_MC
- x86_topology[i++] = (struct sched_domain_topology_level){
- cpu_coregroup_mask, x86_core_flags, SD_INIT_NAME(MC)
- };
+ x86_topology[i++] = SDTL_INIT(cpu_coregroup_mask, x86_core_flags, MC);
#endif
/*
* When there is NUMA topology inside the package skip the PKG domain
* since the NUMA domains will auto-magically create the right spanning
* domains based on the SLIT.
*/
- if (!x86_has_numa_in_package) {
- x86_topology[i++] = (struct sched_domain_topology_level){
- cpu_cpu_mask, x86_sched_itmt_flags, SD_INIT_NAME(PKG)
- };
- }
+ if (!x86_has_numa_in_package)
+ x86_topology[i++] = SDTL_INIT(cpu_cpu_mask, x86_sched_itmt_flags, PKG);
/*
* There must be one trailing NULL entry left.
*/
- BUG_ON(i >= ARRAY_SIZE(x86_topology)-1);
+ BUG_ON(i >= ARRAY_SIZE(x86_topology) - 1);
set_sched_topology(x86_topology);
}
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index e54e7fa..0d5daaa 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -196,8 +196,8 @@ struct sched_domain_topology_level {
extern void __init set_sched_topology(struct sched_domain_topology_level *tl);
extern void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio);
-
-# define SD_INIT_NAME(type) .name = #type
+#define SDTL_INIT(maskfn, flagsfn, dname) ((struct sched_domain_topology_level) \
+ { .mask = maskfn, .sd_flags = flagsfn, .name = #dname })
#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
extern void rebuild_sched_domains_energy(void);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 8e06b1d..d01f5a4 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1737,17 +1737,17 @@ sd_init(struct sched_domain_topology_level *tl,
*/
static struct sched_domain_topology_level default_topology[] = {
#ifdef CONFIG_SCHED_SMT
- { cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
+ SDTL_INIT(cpu_smt_mask, cpu_smt_flags, SMT),
#endif
#ifdef CONFIG_SCHED_CLUSTER
- { cpu_clustergroup_mask, cpu_cluster_flags, SD_INIT_NAME(CLS) },
+ SDTL_INIT(cpu_clustergroup_mask, cpu_cluster_flags, CLS),
#endif
#ifdef CONFIG_SCHED_MC
- { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
+ SDTL_INIT(cpu_coregroup_mask, cpu_core_flags, MC),
#endif
- { cpu_cpu_mask, SD_INIT_NAME(PKG) },
+ SDTL_INIT(cpu_cpu_mask, NULL, PKG),
{ NULL, },
};
@@ -2008,23 +2008,15 @@ void sched_init_numa(int offline_node)
/*
* Add the NUMA identity distance, aka single NODE.
*/
- tl[i++] = (struct sched_domain_topology_level){
- .mask = sd_numa_mask,
- .numa_level = 0,
- SD_INIT_NAME(NODE)
- };
+ tl[i++] = SDTL_INIT(sd_numa_mask, NULL, NODE);
/*
* .. and append 'j' levels of NUMA goodness.
*/
for (j = 1; j < nr_levels; i++, j++) {
- tl[i] = (struct sched_domain_topology_level){
- .mask = sd_numa_mask,
- .sd_flags = cpu_numa_flags,
- .flags = SDTL_OVERLAP,
- .numa_level = j,
- SD_INIT_NAME(NUMA)
- };
+ tl[i] = SDTL_INIT(sd_numa_mask, cpu_numa_flags, NUMA);
+ tl[i].numa_level = j;
+ tl[i].flags = SDTL_OVERLAP;
}
sched_domain_topology_saved = sched_domain_topology;
© 2016 - 2025 Red Hat, Inc.