To handle minor capacity differences (<5%) use asym-packing to steer
tasks towards higher performance CPUs, replacing capacity-aware
scheduling in those cases and skip setting SD_ASYM_CPUCAPACITY.
This is implemented by using highest_perf values as priorities to steer
towards.
highest_perf-based asympacking is a global ordering that is applied
at all levels of the hierarchy for now.
Signed-off-by: Christian Loehle <christian.loehle@arm.com>
---
arch/arm64/include/asm/topology.h | 6 ++++++
arch/arm64/kernel/topology.c | 34 +++++++++++++++++++++++++++++++
kernel/sched/topology.c | 26 ++++++++++++++++-------
3 files changed, 59 insertions(+), 7 deletions(-)
diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h
index b9eaf4ad7085..e0b039e1a5bb 100644
--- a/arch/arm64/include/asm/topology.h
+++ b/arch/arm64/include/asm/topology.h
@@ -39,6 +39,12 @@ void update_freq_counters_refs(void);
#undef arch_cpu_is_threaded
#define arch_cpu_is_threaded() (read_cpuid_mpidr() & MPIDR_MT_BITMASK)
+#undef arch_asym_cpu_priority
+#define arch_asym_cpu_priority arm64_arch_asym_cpu_priority
+#define arch_sched_asym_flags arm64_arch_sched_asym_flags
+extern int arm64_arch_asym_cpu_priority(int cpu);
+extern int arm64_arch_sched_asym_flags(void);
+
#include <asm-generic/topology.h>
#endif /* _ASM_ARM_TOPOLOGY_H */
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index b32f13358fbb..4e3582d44a26 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -19,6 +19,7 @@
#include <linux/init.h>
#include <linux/percpu.h>
#include <linux/sched/isolation.h>
+#include <linux/sched/topology.h>
#include <linux/xarray.h>
#include <asm/cpu.h>
@@ -373,6 +374,26 @@ core_initcall(init_amu_fie);
#ifdef CONFIG_ACPI_CPPC_LIB
#include <acpi/cppc_acpi.h>
+static bool __read_mostly sched_cppc_asym_active;
+DEFINE_PER_CPU_READ_MOSTLY(int, sched_cppc_priority);
+
+int arm64_arch_asym_cpu_priority(int cpu)
+{
+ if (!READ_ONCE(sched_cppc_asym_active))
+ return -cpu;
+ return per_cpu(sched_cppc_priority, cpu);
+}
+
+int arm64_arch_sched_asym_flags(void)
+{
+ return READ_ONCE(sched_cppc_asym_active) ? SD_ASYM_PACKING : 0;
+}
+
+void arch_topology_init_cppc_asym(void)
+{
+ WRITE_ONCE(sched_cppc_asym_active, topology_init_cppc_asym_packing(&sched_cppc_priority));
+}
+
static void cpu_read_corecnt(void *val)
{
/*
@@ -473,4 +494,17 @@ int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val)
{
return -EOPNOTSUPP;
}
+
+#else
+int arm64_arch_asym_cpu_priority(int cpu)
+{
+ return -cpu;
+}
+
+int arm64_arch_sched_asym_flags(void)
+{
+ return 0;
+}
+
+void arch_topology_init_cppc_asym(void) { }
#endif /* CONFIG_ACPI_CPPC_LIB */
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index b0c590dfdb01..758b8796b62d 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1396,6 +1396,8 @@ asym_cpu_capacity_classify(const struct cpumask *sd_span,
const struct cpumask *cpu_map)
{
struct asym_cap_data *entry;
+ unsigned long max_cap = 0, min_cap = ULONG_MAX;
+ bool has_cap = false;
int count = 0, miss = 0;
/*
@@ -1405,9 +1407,12 @@ asym_cpu_capacity_classify(const struct cpumask *sd_span,
* skip those.
*/
list_for_each_entry(entry, &asym_cap_list, link) {
- if (cpumask_intersects(sd_span, cpu_capacity_span(entry)))
+ if (cpumask_intersects(sd_span, cpu_capacity_span(entry))) {
++count;
- else if (cpumask_intersects(cpu_map, cpu_capacity_span(entry)))
+ max_cap = max(max_cap, entry->capacity);
+ min_cap = min(min_cap, entry->capacity);
+ has_cap = true;
+ } else if (cpumask_intersects(cpu_map, cpu_capacity_span(entry)))
++miss;
}
@@ -1419,10 +1424,12 @@ asym_cpu_capacity_classify(const struct cpumask *sd_span,
/* Some of the available CPU capacity values have not been detected */
if (miss)
return SD_ASYM_CPUCAPACITY;
+ /* When asym packing is active, ignore small capacity differences. */
+ if (arch_sched_asym_flags() && has_cap && !capacity_greater(max_cap, min_cap))
+ return 0;
/* Full asymmetry */
return SD_ASYM_CPUCAPACITY | SD_ASYM_CPUCAPACITY_FULL;
-
}
static void free_asym_cap_entry(struct rcu_head *head)
@@ -1753,7 +1760,7 @@ static inline int topology_arch_sched_asym_flags(void)
#ifdef CONFIG_SCHED_SMT
int cpu_smt_flags(void)
{
- return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
+ return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC | arch_sched_asym_flags();
}
const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu)
@@ -1765,7 +1772,7 @@ const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cp
#ifdef CONFIG_SCHED_CLUSTER
int cpu_cluster_flags(void)
{
- return SD_CLUSTER | SD_SHARE_LLC;
+ return SD_CLUSTER | SD_SHARE_LLC | arch_sched_asym_flags();
}
const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu)
@@ -1777,7 +1784,7 @@ const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cp
#ifdef CONFIG_SCHED_MC
int cpu_core_flags(void)
{
- return SD_SHARE_LLC;
+ return SD_SHARE_LLC | arch_sched_asym_flags();
}
const struct cpumask *tl_mc_mask(struct sched_domain_topology_level *tl, int cpu)
@@ -1791,6 +1798,11 @@ const struct cpumask *tl_pkg_mask(struct sched_domain_topology_level *tl, int cp
return cpu_node_mask(cpu);
}
+static int cpu_pkg_flags(void)
+{
+ return arch_sched_asym_flags();
+}
+
/*
* Topology list, bottom-up.
*/
@@ -1806,7 +1818,7 @@ static struct sched_domain_topology_level default_topology[] = {
#ifdef CONFIG_SCHED_MC
SDTL_INIT(tl_mc_mask, cpu_core_flags, MC),
#endif
- SDTL_INIT(tl_pkg_mask, NULL, PKG),
+ SDTL_INIT(tl_pkg_mask, cpu_pkg_flags, PKG),
{ NULL, },
};
--
2.34.1
On 25/03/26 18:13, Christian Loehle wrote:
> @@ -1753,7 +1760,7 @@ static inline int topology_arch_sched_asym_flags(void)
> #ifdef CONFIG_SCHED_SMT
> int cpu_smt_flags(void)
> {
> - return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
> + return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC | arch_sched_asym_flags();
> }
>
> const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu)
> @@ -1765,7 +1772,7 @@ const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cp
> #ifdef CONFIG_SCHED_CLUSTER
> int cpu_cluster_flags(void)
> {
> - return SD_CLUSTER | SD_SHARE_LLC;
> + return SD_CLUSTER | SD_SHARE_LLC | arch_sched_asym_flags();
> }
>
> const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu)
> @@ -1777,7 +1784,7 @@ const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cp
> #ifdef CONFIG_SCHED_MC
> int cpu_core_flags(void)
> {
> - return SD_SHARE_LLC;
> + return SD_SHARE_LLC | arch_sched_asym_flags();
> }
>
So while the binning problem applies to more than one architecture, I'm not
sure we want this to be generally applied to all topology levels. This is
/technically/ not a problem since even if a topology level has
SD_ASYM_PACKING, all CPUs at that level can have the same priority, but
in that case it's a bit wasteful.
I don't have any better ideas ATM to keep this arch-specific via
set_sched_topology(), like how x86 and powerpc handle asym packing.
On 3/27/26 15:44, Valentin Schneider wrote:
> On 25/03/26 18:13, Christian Loehle wrote:
>> @@ -1753,7 +1760,7 @@ static inline int topology_arch_sched_asym_flags(void)
>> #ifdef CONFIG_SCHED_SMT
>> int cpu_smt_flags(void)
>> {
>> - return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC;
>> + return SD_SHARE_CPUCAPACITY | SD_SHARE_LLC | arch_sched_asym_flags();
>> }
>>
>> const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cpu)
>> @@ -1765,7 +1772,7 @@ const struct cpumask *tl_smt_mask(struct sched_domain_topology_level *tl, int cp
>> #ifdef CONFIG_SCHED_CLUSTER
>> int cpu_cluster_flags(void)
>> {
>> - return SD_CLUSTER | SD_SHARE_LLC;
>> + return SD_CLUSTER | SD_SHARE_LLC | arch_sched_asym_flags();
>> }
>>
>> const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cpu)
>> @@ -1777,7 +1784,7 @@ const struct cpumask *tl_cls_mask(struct sched_domain_topology_level *tl, int cp
>> #ifdef CONFIG_SCHED_MC
>> int cpu_core_flags(void)
>> {
>> - return SD_SHARE_LLC;
>> + return SD_SHARE_LLC | arch_sched_asym_flags();
>> }
>>
>
> So while the binning problem applies to more than one architecture, I'm not
> sure we want this to be generally applied to all topology levels. This is
> /technically/ not a problem since even if a topology level has
> SD_ASYM_PACKING, all CPUs at that level can have the same priority, but
> in that case it's a bit wasteful.
>
> I don't have any better ideas ATM to keep this arch-specific via
> set_sched_topology(), like how x86 and powerpc handle asym packing.
>
Right, I think it doesn't make sense at SMT level, but the rest seems
sensible IMO?
It is noted for v2, although unless Andrea sees vastly different results
for Grace than for Vera appetite for this is rather low anyway.
Hi Christian, kernel test robot noticed the following build errors: [auto build test ERROR on tip/sched/core] [also build test ERROR on arm64/for-next/core driver-core/driver-core-testing driver-core/driver-core-next driver-core/driver-core-linus peterz-queue/sched/core linus/master v7.0-rc5 next-20260325] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Christian-Loehle/sched-topology-Introduce-arch-hooks-for-asympacking/20260326-145644 base: tip/sched/core patch link: https://lore.kernel.org/r/20260325181314.3875909-4-christian.loehle%40arm.com patch subject: [PATCH 3/3] arm64/sched: Enable CPPC-based asympacking config: openrisc-allnoconfig (https://download.01.org/0day-ci/archive/20260326/202603262311.bSaX71dF-lkp@intel.com/config) compiler: or1k-linux-gcc (GCC) 15.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260326/202603262311.bSaX71dF-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202603262311.bSaX71dF-lkp@intel.com/ All errors (new ones prefixed by >>): or1k-linux-ld: kernel/sched/build_utility.o: in function `sd_init.constprop.0': build_utility.c:(.text+0x1840): undefined reference to `arch_sched_asym_flags' >> build_utility.c:(.text+0x1840): relocation truncated to fit: R_OR1K_INSN_REL_26 against undefined symbol `arch_sched_asym_flags' -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Hi Christian, kernel test robot noticed the following build errors: [auto build test ERROR on tip/sched/core] [also build test ERROR on arm64/for-next/core driver-core/driver-core-testing driver-core/driver-core-next driver-core/driver-core-linus peterz-queue/sched/core linus/master v7.0-rc5 next-20260325] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Christian-Loehle/sched-topology-Introduce-arch-hooks-for-asympacking/20260326-145644 base: tip/sched/core patch link: https://lore.kernel.org/r/20260325181314.3875909-4-christian.loehle%40arm.com patch subject: [PATCH 3/3] arm64/sched: Enable CPPC-based asympacking config: parisc-allnoconfig (https://download.01.org/0day-ci/archive/20260326/202603262307.63Wed8OI-lkp@intel.com/config) compiler: hppa-linux-gcc (GCC) 15.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20260326/202603262307.63Wed8OI-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202603262307.63Wed8OI-lkp@intel.com/ All errors (new ones prefixed by >>): hppa-linux-ld: kernel/sched/build_utility.o: in function `sd_init.constprop.0': >> (.text+0x17b8): undefined reference to `arch_sched_asym_flags' -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
© 2016 - 2026 Red Hat, Inc.