arch/Kconfig | 3 ++ arch/powerpc/Kconfig | 1 + arch/x86/Kconfig | 1 + kernel/sched/fair.c | 86 ++++++++++++++++++++++++++++++-------------- kernel/sched/sched.h | 8 ++++- 5 files changed, 71 insertions(+), 28 deletions(-)
Only x86 and Power7 set SD_ASYM_PACKING at boot-time depending on the
system. All other platforms don't make use of asym-packing anyway,
so introduce auxiliary ARCH_HAS_SCHED_ASYM_PACKING and guard all the
related logic behind that so it isn't compiled when not needed.
On arm64 this reduces
size kernel/sched/fair.o
text data bss dec hex filename
74120 4097 88 78305 131e1 kernel/sched/fair.o
to
size kernel/sched/fair.o
text data bss dec hex filename
72896 4065 88 77049 12cf9 kernel/sched/fair.o
Most of that is on the load-balance hot-path, in particular
need_active_balance() reduces from 141 to 84 instructions.
hackbench -pTl 20000 on a rk3399 goes from
58.4664 to 57.6056 (-1.5%), mean over 20 iterations.
Signed-off-by: Christian Loehle <christian.loehle@arm.com>
---
arch/Kconfig | 3 ++
arch/powerpc/Kconfig | 1 +
arch/x86/Kconfig | 1 +
kernel/sched/fair.c | 86 ++++++++++++++++++++++++++++++--------------
kernel/sched/sched.h | 8 ++++-
5 files changed, 71 insertions(+), 28 deletions(-)
diff --git a/arch/Kconfig b/arch/Kconfig
index 8af374ea1adc..9f6ec10723a3 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -1684,4 +1684,7 @@ config CC_HAS_SANE_FUNCTION_ALIGNMENT
config ARCH_NEED_CMPXCHG_1_EMU
bool
+config ARCH_HAS_SCHED_ASYM_PACKING
+ bool
+
endmenu
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8094a01974cc..5de478f9087e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -148,6 +148,7 @@ config PPC
select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
+ select ARCH_HAS_SCHED_ASYM_PACKING
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_STRICT_KERNEL_RWX if (PPC_BOOK3S || PPC_8xx) && !HIBERNATION
select ARCH_HAS_STRICT_KERNEL_RWX if PPC_85xx && !HIBERNATION && !RANDOMIZE_BASE
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 2852fcd82cbd..6d7db89d141c 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -99,6 +99,7 @@ config X86
select ARCH_HAS_NONLEAF_PMD_YOUNG if PGTABLE_LEVELS > 2
select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64
select ARCH_HAS_COPY_MC if X86_64
+ select ARCH_HAS_SCHED_ASYM_PACKING
select ARCH_HAS_SET_MEMORY
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 225b31aaee55..5a2108b16340 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -86,13 +86,6 @@ static int __init setup_sched_thermal_decay_shift(char *str)
__setup("sched_thermal_decay_shift=", setup_sched_thermal_decay_shift);
#ifdef CONFIG_SMP
-/*
- * For asym packing, by default the lower numbered CPU has higher priority.
- */
-int __weak arch_asym_cpu_priority(int cpu)
-{
- return -cpu;
-}
/*
* The margin used when comparing utilization with CPU capacity.
@@ -9186,12 +9179,14 @@ enum group_type {
* a task on SMT with busy sibling to another CPU on idle core.
*/
group_smt_balance,
+#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING
/*
* SD_ASYM_PACKING only: One local CPU with higher capacity is available,
* and the task should be migrated to it instead of running on the
* current CPU.
*/
group_asym_packing,
+#endif
/*
* The tasks' affinity constraints previously prevented the scheduler
* from balancing the load across the system.
@@ -9876,7 +9871,9 @@ struct sg_lb_stats {
unsigned int idle_cpus; /* Nr of idle CPUs in the group */
unsigned int group_weight;
enum group_type group_type;
+#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING
unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */
+#endif
unsigned int group_smt_balance; /* Task on busy SMT be moved */
unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */
#ifdef CONFIG_NUMA_BALANCING
@@ -10136,8 +10133,10 @@ group_type group_classify(unsigned int imbalance_pct,
if (sg_imbalanced(group))
return group_imbalanced;
+#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING
if (sgs->group_asym_packing)
return group_asym_packing;
+#endif
if (sgs->group_smt_balance)
return group_smt_balance;
@@ -10151,6 +10150,15 @@ group_type group_classify(unsigned int imbalance_pct,
return group_has_spare;
}
+#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING
+/*
+ * For asym packing, by default the lower numbered CPU has higher priority.
+ */
+int __weak arch_asym_cpu_priority(int cpu)
+{
+ return -cpu;
+}
+
/**
* sched_use_asym_prio - Check whether asym_packing priority must be used
* @sd: The scheduling domain of the load balancing
@@ -10209,6 +10217,40 @@ sched_group_asym(struct lb_env *env, struct sg_lb_stats *sgs, struct sched_group
return sched_asym(env->sd, env->dst_cpu, group->asym_prefer_cpu);
}
+static inline bool asym_active_balance(struct lb_env *env)
+{
+ /*
+ * ASYM_PACKING needs to force migrate tasks from busy but lower
+ * priority CPUs in order to pack all tasks in the highest priority
+ * CPUs. When done between cores, do it only if the whole core if the
+ * whole core is idle.
+ *
+ * If @env::src_cpu is an SMT core with busy siblings, let
+ * the lower priority @env::dst_cpu help it. Do not follow
+ * CPU priority.
+ */
+ return env->idle && sched_use_asym_prio(env->sd, env->dst_cpu) &&
+ (sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
+ !sched_use_asym_prio(env->sd, env->src_cpu));
+}
+
+static inline bool check_asym_packing(struct sg_lb_stats *group)
+{
+ return group->group_type == group_asym_packing;
+}
+
+#else
+
+static inline bool sched_asym(struct sched_domain *sd, int dst_cpu, int src_cpu)
+{
+ return false;
+}
+
+static inline bool asym_active_balance(struct lb_env *env) { return false; }
+static inline bool check_asym_packing(struct sg_lb_stats *group) { return false; }
+
+#endif /* CONFIG_ARCH_HAS_SCHED_ASYM_PACKING */
+
/* One group has more than one SMT CPU while the other group does not */
static inline bool smt_vs_nonsmt_groups(struct sched_group *sg1,
struct sched_group *sg2)
@@ -10360,10 +10402,12 @@ static inline void update_sg_lb_stats(struct lb_env *env,
sgs->group_weight = group->group_weight;
+#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING
/* Check if dst CPU is idle and preferred to this group */
if (!local_group && env->idle && sgs->sum_h_nr_running &&
sched_group_asym(env, sgs, group))
sgs->group_asym_packing = 1;
+#endif
/* Check for loaded SMT group to be balanced to dst CPU */
if (!local_group && smt_balance(env, sgs, group))
@@ -10436,9 +10480,11 @@ static bool update_sd_pick_busiest(struct lb_env *env,
*/
return false;
+#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING
case group_asym_packing:
/* Prefer to move from lowest priority CPU's work */
return sched_asym_prefer(sds->busiest->asym_prefer_cpu, sg->asym_prefer_cpu);
+#endif
case group_misfit_task:
/*
@@ -10691,7 +10737,9 @@ static bool update_pick_idlest(struct sched_group *idlest,
break;
case group_imbalanced:
+#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING
case group_asym_packing:
+#endif
case group_smt_balance:
/* Those types are not used in the slow wakeup path */
return false;
@@ -10823,7 +10871,9 @@ sched_balance_find_dst_group(struct sched_domain *sd, struct task_struct *p, int
break;
case group_imbalanced:
+#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING
case group_asym_packing:
+#endif
case group_smt_balance:
/* Those type are not used in the slow wakeup path */
return NULL;
@@ -11058,7 +11108,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s
return;
}
- if (busiest->group_type == group_asym_packing) {
+ if (check_asym_packing(busiest)) {
/*
* In case of asym capacity, we will try to migrate all load to
* the preferred CPU.
@@ -11265,7 +11315,7 @@ static struct sched_group *sched_balance_find_src_group(struct lb_env *env)
goto out_balanced;
/* ASYM feature bypasses nice load balance check */
- if (busiest->group_type == group_asym_packing)
+ if (check_asym_packing(busiest))
goto force_balance;
/*
@@ -11520,24 +11570,6 @@ static struct rq *sched_balance_find_src_rq(struct lb_env *env,
*/
#define MAX_PINNED_INTERVAL 512
-static inline bool
-asym_active_balance(struct lb_env *env)
-{
- /*
- * ASYM_PACKING needs to force migrate tasks from busy but lower
- * priority CPUs in order to pack all tasks in the highest priority
- * CPUs. When done between cores, do it only if the whole core if the
- * whole core is idle.
- *
- * If @env::src_cpu is an SMT core with busy siblings, let
- * the lower priority @env::dst_cpu help it. Do not follow
- * CPU priority.
- */
- return env->idle && sched_use_asym_prio(env->sd, env->dst_cpu) &&
- (sched_asym_prefer(env->dst_cpu, env->src_cpu) ||
- !sched_use_asym_prio(env->sd, env->src_cpu));
-}
-
static inline bool
imbalanced_active_balance(struct lb_env *env)
{
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b1c3588a8f00..51d49700f643 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -937,11 +937,17 @@ static inline long se_weight(struct sched_entity *se)
return scale_load_down(se->load.weight);
}
-
+#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING
static inline bool sched_asym_prefer(int a, int b)
{
return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b);
}
+#else
+static inline bool sched_asym_prefer(int a, int b)
+{
+ return false;
+}
+#endif
struct perf_domain {
struct em_perf_domain *em_pd;
--
2.34.1
On Thu, Oct 17, 2024 at 10:46:49PM +0100, Christian Loehle wrote: > Only x86 and Power7 set SD_ASYM_PACKING at boot-time depending on the > system. All other platforms don't make use of asym-packing anyway, > so introduce auxiliary ARCH_HAS_SCHED_ASYM_PACKING and guard all the > related logic behind that so it isn't compiled when not needed. > > On arm64 this reduces > size kernel/sched/fair.o > text data bss dec hex filename > 74120 4097 88 78305 131e1 kernel/sched/fair.o > to > size kernel/sched/fair.o > text data bss dec hex filename > 72896 4065 88 77049 12cf9 kernel/sched/fair.o > > Most of that is on the load-balance hot-path, in particular > need_active_balance() reduces from 141 to 84 instructions. > > hackbench -pTl 20000 on a rk3399 goes from > 58.4664 to 57.6056 (-1.5%), mean over 20 iterations. *sigh* more ifdef is the very last thing we need :/ What's the error on that measurement? Is it statistically relevant etc. > @@ -9186,12 +9179,14 @@ enum group_type { > * a task on SMT with busy sibling to another CPU on idle core. > */ > group_smt_balance, > +#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING > /* > * SD_ASYM_PACKING only: One local CPU with higher capacity is available, > * and the task should be migrated to it instead of running on the > * current CPU. > */ > group_asym_packing, > +#endif Do we really need to remove the value from the enum !? > /* > * The tasks' affinity constraints previously prevented the scheduler > * from balancing the load across the system. > @@ -9876,7 +9871,9 @@ struct sg_lb_stats { > unsigned int idle_cpus; /* Nr of idle CPUs in the group */ > unsigned int group_weight; > enum group_type group_type; > +#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING > unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */ > +#endif just leave it be, who cares if it goes unused? > unsigned int group_smt_balance; /* Task on busy SMT be moved */ > unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */ > #ifdef CONFIG_NUMA_BALANCING > @@ -10136,8 +10133,10 @@ group_type group_classify(unsigned int imbalance_pct, > if (sg_imbalanced(group)) > return group_imbalanced; > > +#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING > if (sgs->group_asym_packing) > return group_asym_packing; > +#endif Add a helper that returns false such that the compiler can DCE it? > @@ -10360,10 +10402,12 @@ static inline void update_sg_lb_stats(struct lb_env *env, > > sgs->group_weight = group->group_weight; > > +#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING > /* Check if dst CPU is idle and preferred to this group */ > if (!local_group && env->idle && sgs->sum_h_nr_running && > sched_group_asym(env, sgs, group)) > sgs->group_asym_packing = 1; > +#endif Just make sure sched_group_asym() is unconditionally false and the compiler will DCE it, no? > > /* Check for loaded SMT group to be balanced to dst CPU */ > if (!local_group && smt_balance(env, sgs, group)) > @@ -10436,9 +10480,11 @@ static bool update_sd_pick_busiest(struct lb_env *env, > */ > return false; > > +#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING > case group_asym_packing: > /* Prefer to move from lowest priority CPU's work */ > return sched_asym_prefer(sds->busiest->asym_prefer_cpu, sg->asym_prefer_cpu); > +#endif Just leave it be, it'll never get selected. > case group_misfit_task: > /* > @@ -10691,7 +10737,9 @@ static bool update_pick_idlest(struct sched_group *idlest, > break; > > case group_imbalanced: > +#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING > case group_asym_packing: > +#endif idem > case group_smt_balance: > /* Those types are not used in the slow wakeup path */ > return false; > @@ -10823,7 +10871,9 @@ sched_balance_find_dst_group(struct sched_domain *sd, struct task_struct *p, int > break; > > case group_imbalanced: > +#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING > case group_asym_packing: > +#endif and again. > case group_smt_balance: > /* Those type are not used in the slow wakeup path */ > return NULL; > @@ -11058,7 +11108,7 @@ static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *s > return; > } > > - if (busiest->group_type == group_asym_packing) { > + if (check_asym_packing(busiest)) { > /* > * In case of asym capacity, we will try to migrate all load to > * the preferred CPU. > @@ -11265,7 +11315,7 @@ static struct sched_group *sched_balance_find_src_group(struct lb_env *env) > goto out_balanced; > > /* ASYM feature bypasses nice load balance check */ > - if (busiest->group_type == group_asym_packing) > + if (check_asym_packing(busiest)) > goto force_balance; > > /* > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h > index b1c3588a8f00..51d49700f643 100644 > --- a/kernel/sched/sched.h > +++ b/kernel/sched/sched.h > @@ -937,11 +937,17 @@ static inline long se_weight(struct sched_entity *se) > return scale_load_down(se->load.weight); > } > > - > +#ifdef CONFIG_ARCH_HAS_SCHED_ASYM_PACKING > static inline bool sched_asym_prefer(int a, int b) > { > return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b); > } > +#else > +static inline bool sched_asym_prefer(int a, int b) > +{ > + return false; > +} > +#endif Or you can write: static inline bool sched_asym_prefer(int a, int b) { if (!IS_ENABLED(CONFIG_ARCH_HAS_SCHED_ASYM_PACKING)) return false; return arch_asym_cpu_priority(a) > arch_asym_cpu_priority(b); } Anyway, ifdef bad, less is more. DCE good.
On 10/18/24 03:16, Christian Loehle wrote: > Only x86 and Power7 set SD_ASYM_PACKING at boot-time depending on the Power10 on Shared Processor LPAR also enables that. > system. All other platforms don't make use of asym-packing anyway, > so introduce auxiliary ARCH_HAS_SCHED_ASYM_PACKING and guard all the > related logic behind that so it isn't compiled when not needed. > > On arm64 this reduces > size kernel/sched/fair.o > text data bss dec hex filename > 74120 4097 88 78305 131e1 kernel/sched/fair.o > to > size kernel/sched/fair.o > text data bss dec hex filename > 72896 4065 88 77049 12cf9 kernel/sched/fair.o > > Most of that is on the load-balance hot-path, in particular > need_active_balance() reduces from 141 to 84 instructions. This patch sprinkes #ifdef across the code. IMHO it makes the code difficult to read. Its possible to do slight such reductions for other group types as well, such as group_misfit_task(which allows right load balancing when SD_ASYM_CPUCAPACITY is on, except for a case when capacity is reduced due to side activity). sd->flags should be cache hot and its a percpu variable. Hence the access to it shouldn't be costly. the branch predictors should do a good job since it doesn't change often. Correct me if i am wrong. > > hackbench -pTl 20000 on a rk3399 goes from > 58.4664 to 57.6056 (-1.5%), mean over 20 iterations. > > Signed-off-by: Christian Loehle <christian.loehle@arm.com> > --- > arch/Kconfig | 3 ++ > arch/powerpc/Kconfig | 1 + > arch/x86/Kconfig | 1 + > kernel/sched/fair.c | 86 ++++++++++++++++++++++++++++++-------------- > kernel/sched/sched.h | 8 ++++- > 5 files changed, 71 insertions(+), 28 deletions(-) > > diff --git a/arch/Kconfig b/arch/Kconfig > index 8af374ea1adc..9f6ec10723a3 100644 > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -1684,4 +1684,7 @@ config CC_HAS_SANE_FUNCTION_ALIGNMENT > config ARCH_NEED_CMPXCHG_1_EMU > bool ...
Shrikanth Hegde <sshegde@linux.ibm.com> writes: > On 10/18/24 03:16, Christian Loehle wrote: >> Only x86 and Power7 set SD_ASYM_PACKING at boot-time depending on the > > Power10 on Shared Processor LPAR also enables that. > >> system. All other platforms don't make use of asym-packing anyway, >> so introduce auxiliary ARCH_HAS_SCHED_ASYM_PACKING and guard all the >> related logic behind that so it isn't compiled when not needed. >> >> On arm64 this reduces >> size kernel/sched/fair.o >> text data bss dec hex filename >> 74120 4097 88 78305 131e1 kernel/sched/fair.o >> to >> size kernel/sched/fair.o >> text data bss dec hex filename >> 72896 4065 88 77049 12cf9 kernel/sched/fair.o >> >> Most of that is on the load-balance hot-path, in particular >> need_active_balance() reduces from 141 to 84 instructions. > > This patch sprinkes #ifdef across the code. IMHO it makes the code > difficult to read. Yes, strong agree on that. It should be possible to clean that up for the most part with some static inline helpers with stubs for the disabled case, in the usual kernel style. Also Christian please Cc linuxppc-dev@lists.ozlabs.org on v2. cheers
© 2016 - 2024 Red Hat, Inc.