kernel/sched/fair.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-)
The function "can_migrate_task()" utilize "for_each_cpu_and" with a
"if" statement inside to find the destination cpu. It's the same logic
to find the first set bit of the result of the bitwise-AND of
"env->dst_grpmask", "env->cpus" and "p->cpus_ptr".
Refactor it by directly perform bitwise-AND for "env->dst_grpmask",
"env->cpus" and "p->cpus_ptr" and use "cpumask_first()" to select the
destination cpu, so we can elimate the need of looping and multiple
times of branch.
After the refactoring this part of the code can speed up from ~130ns to
~93ns, according to the test below.
Ran the test for 5 times and the result is showned in the following
table, and the test script is paste in next section.
-------------------------------------------------------
|Old method| 123| 135| 126| 129| 135| avg ~130ns|
-------------------------------------------------------
|New method| 87| 97| 90| 92| 98| avg ~93ns|
-------------------------------------------------------
Signed-off-by: I Hsin Cheng <richard120310@gmail.com>
---
Test is done on Linux 6.8.0-48-generic x86_64 with
Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz
Test is executed in the form of kernel module.
Test script:
int init_module(void)
{
struct cpumask cur_mask, custom_mask, result_mask;
struct task_struct *p = current;
int cpu, cpu1 = nr_cpu_ids, cpu2 = nr_cpu_ids;
unsigned tmp = 0;
cpumask_copy(&cur_mask, cpu_online_mask);
/* Self-implemented function, didn't paste here because the length */
generate_random_cpumask(&custom_mask);
ktime_t start_1 = ktime_get();
for_each_cpu_and(cpu, &cur_mask, &custom_mask) {
if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
/* imitate load balance operation */
tmp |= 0x01010101;
cpu1 = cpu;
break;
}
}
ktime_t end_1 = ktime_get();
ktime_t start_2 = ktime_get();
cpumask_and(&result_mask, &cur_mask, &custom_mask);
cpumask_and(&result_mask, &result_mask, p->cpus_ptr);
cpu = cpumask_first(&result_mask);
if (cpu < nr_cpu_ids) {
/* imitate load balance operation */
tmp |= 0x01010101;
cpu2 = cpu;
}
ktime_t end_2 = ktime_get();
if (cpu1 != cpu2) {
pr_err("Failed Assertion, cpu1 = %d, cpu2 = %d\n", cpu1, cpu2);
return 0;
}
pr_info("Old method spend time : %lld\n", ktime_to_ns(end_1 - start_1));
pr_info("New method spend time : %lld\n", ktime_to_ns(end_2 - start_2));
return 0;
}
---
kernel/sched/fair.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2d16c8545..ce46f61da 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9404,12 +9404,16 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
return 0;
/* Prevent to re-select dst_cpu via env's CPUs: */
- for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
- if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
- env->flags |= LBF_DST_PINNED;
- env->new_dst_cpu = cpu;
- break;
- }
+ struct cpumask dst_mask;
+
+ cpumask_and(&dst_mask, env->dst_grpmask, env->cpus);
+ cpumask_and(&dst_mask, &dst_mask, p->cpus_ptr);
+
+ cpu = cpumask_first(&dst_mask);
+
+ if (cpu < nr_cpu_ids) {
+ env->flags |= LBF_DST_PINNED;
+ env->new_dst_cpu = cpu;
}
return 0;
--
2.43.0
On Thu, Jan 09, 2025 at 01:29:47AM +0800, I Hsin Cheng wrote:
> kernel/sched/fair.c | 16 ++++++++++------
> 1 file changed, 10 insertions(+), 6 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 2d16c8545..ce46f61da 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -9404,12 +9404,16 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
> return 0;
>
> /* Prevent to re-select dst_cpu via env's CPUs: */
> - for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
> - if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
> - env->flags |= LBF_DST_PINNED;
> - env->new_dst_cpu = cpu;
> - break;
> - }
> + struct cpumask dst_mask;
Except you cannot put cpumask on-stack...
> +
> + cpumask_and(&dst_mask, env->dst_grpmask, env->cpus);
> + cpumask_and(&dst_mask, &dst_mask, p->cpus_ptr);
> +
> + cpu = cpumask_first(&dst_mask);
> +
> + if (cpu < nr_cpu_ids) {
> + env->flags |= LBF_DST_PINNED;
> + env->new_dst_cpu = cpu;
> }
>
> return 0;
> --
> 2.43.0
>
On Thu, Jan 09, 2025 at 10:46:27AM +0100, Peter Zijlstra wrote:
> On Thu, Jan 09, 2025 at 01:29:47AM +0800, I Hsin Cheng wrote:
> > kernel/sched/fair.c | 16 ++++++++++------
> > 1 file changed, 10 insertions(+), 6 deletions(-)
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 2d16c8545..ce46f61da 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -9404,12 +9404,16 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
> > return 0;
> >
> > /* Prevent to re-select dst_cpu via env's CPUs: */
> > - for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
> > - if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
> > - env->flags |= LBF_DST_PINNED;
> > - env->new_dst_cpu = cpu;
> > - break;
> > - }
> > + struct cpumask dst_mask;
>
> Except you cannot put cpumask on-stack...
>
> > +
> > + cpumask_and(&dst_mask, env->dst_grpmask, env->cpus);
> > + cpumask_and(&dst_mask, &dst_mask, p->cpus_ptr);
> > +
> > + cpu = cpumask_first(&dst_mask);
> > +
> > + if (cpu < nr_cpu_ids) {
> > + env->flags |= LBF_DST_PINNED;
> > + env->new_dst_cpu = cpu;
> > }
> >
> > return 0;
> > --
> > 2.43.0
> >
> > Except you cannot put cpumask on-stack...
Oh I'm sorry, may I ask the reason? is it because cpumask tends to be
very large?
I assume we're not supposed to change the value of "env->dst_grpmask" or
"env->cpus" here, so in order to achieve something like this we need
another cpumask, explicitly allocate a cpumask on heap for this section
would be an overkill I think ?
Or maybe we can have kernel maintain a global cpumask so anyone wishing
to perform some operations will get to do it without maintain allocate a
cpumask themselves?
On Thu, Jan 09, 2025 at 06:12:22PM +0800, I Hsin Cheng wrote: > > > Except you cannot put cpumask on-stack... > > Oh I'm sorry, may I ask the reason? is it because cpumask tends to be > very large? Yes, when building with NR_CPUS=8192 (as Distos tend to do) it is 1K of stack space.
On Thu, Jan 09, 2025 at 11:21:22AM +0100, Peter Zijlstra wrote: > On Thu, Jan 09, 2025 at 06:12:22PM +0800, I Hsin Cheng wrote: > > > > > Except you cannot put cpumask on-stack... > > > > Oh I'm sorry, may I ask the reason? is it because cpumask tends to be > > very large? > > Yes, when building with NR_CPUS=8192 (as Distos tend to do) it is 1K of > stack space. > Thanks for your explanation! I've made the requested changes using "cpumask_first_and_and()" so there won't be additional cpumask needed. Tests are done and I'm sending a v2 rfc patch. -- Richard.
On Thu, Jan 09, 2025 at 01:29:47AM +0800, I Hsin Cheng wrote:
> @@ -9404,12 +9404,16 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
> return 0;
>
> /* Prevent to re-select dst_cpu via env's CPUs: */
> - for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
> - if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
> - env->flags |= LBF_DST_PINNED;
> - env->new_dst_cpu = cpu;
> - break;
> - }
> + struct cpumask dst_mask;
> +
> + cpumask_and(&dst_mask, env->dst_grpmask, env->cpus);
> + cpumask_and(&dst_mask, &dst_mask, p->cpus_ptr);
> +
> + cpu = cpumask_first(&dst_mask);
Can cpumask_first_and_and() be used instead?
> +
> + if (cpu < nr_cpu_ids) {
> + env->flags |= LBF_DST_PINNED;
> + env->new_dst_cpu = cpu;
> }
>
> return 0;
> --
> 2.43.0
>
--Nysal
On Thu, Jan 09, 2025 at 02:28:27PM +0530, Nysal Jan K.A. wrote:
> On Thu, Jan 09, 2025 at 01:29:47AM +0800, I Hsin Cheng wrote:
> > @@ -9404,12 +9404,16 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
> > return 0;
> >
> > /* Prevent to re-select dst_cpu via env's CPUs: */
> > - for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
> > - if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
> > - env->flags |= LBF_DST_PINNED;
> > - env->new_dst_cpu = cpu;
> > - break;
> > - }
> > + struct cpumask dst_mask;
> > +
> > + cpumask_and(&dst_mask, env->dst_grpmask, env->cpus);
> > + cpumask_and(&dst_mask, &dst_mask, p->cpus_ptr);
> > +
> > + cpu = cpumask_first(&dst_mask);
>
> Can cpumask_first_and_and() be used instead?
>
> > +
> > + if (cpu < nr_cpu_ids) {
> > + env->flags |= LBF_DST_PINNED;
> > + env->new_dst_cpu = cpu;
> > }
> >
> > return 0;
> > --
> > 2.43.0
> >
>
> --Nysal
> Can cpumask_first_and_and() be used instead?
That's a great idea! thank you.
That way we don't have to put redundant cpumask on stack, I'll test it
real soon and send the test result up.
--Richard
© 2016 - 2025 Red Hat, Inc.