[RFC PATCH] sched/fair: Refactor can_migrate_task() to elimate looping

I Hsin Cheng posted 1 patch 11 months, 2 weeks ago
There is a newer version of this series
kernel/sched/fair.c | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
[RFC PATCH] sched/fair: Refactor can_migrate_task() to elimate looping
Posted by I Hsin Cheng 11 months, 2 weeks ago
The function "can_migrate_task()" utilize "for_each_cpu_and" with a
"if" statement inside to find the destination cpu. It's the same logic
to find the first set bit of the result of the bitwise-AND of
"env->dst_grpmask", "env->cpus" and "p->cpus_ptr".

Refactor it by directly perform bitwise-AND for "env->dst_grpmask",
"env->cpus" and "p->cpus_ptr" and use "cpumask_first()" to select the
destination cpu, so we can elimate the need of looping and multiple
times of branch.

After the refactoring this part of the code can speed up from ~130ns to
~93ns, according to the test below.

Ran the test for 5 times and the result is showned in the following
table, and the test script is paste in next section.

-------------------------------------------------------
|Old method|  123|  135|  126|  129|  135|  avg ~130ns|
-------------------------------------------------------
|New method|   87|   97|   90|   92|   98|  avg  ~93ns|
-------------------------------------------------------

Signed-off-by: I Hsin Cheng <richard120310@gmail.com>
---
Test is done on Linux 6.8.0-48-generic x86_64 with
Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz

Test is executed in the form of kernel module.

Test script:

int init_module(void)
{
    struct cpumask cur_mask, custom_mask, result_mask;
    struct task_struct *p = current;
    int cpu, cpu1 = nr_cpu_ids, cpu2 = nr_cpu_ids;
    unsigned tmp = 0;

    cpumask_copy(&cur_mask, cpu_online_mask);
    /* Self-implemented function, didn't paste here because the length */
    generate_random_cpumask(&custom_mask);

    ktime_t start_1 = ktime_get();
    for_each_cpu_and(cpu, &cur_mask, &custom_mask) {
        if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
            /* imitate load balance operation */
            tmp |= 0x01010101;
            cpu1 = cpu;
            break;
        }
    }
    ktime_t end_1 = ktime_get();

    ktime_t start_2 = ktime_get();
    cpumask_and(&result_mask, &cur_mask, &custom_mask);
    cpumask_and(&result_mask, &result_mask, p->cpus_ptr);

    cpu = cpumask_first(&result_mask);

    if (cpu < nr_cpu_ids) {
        /* imitate load balance operation */
        tmp |= 0x01010101;
        cpu2 = cpu;
    }
    ktime_t end_2 = ktime_get();

    if (cpu1 != cpu2) {
        pr_err("Failed Assertion, cpu1 = %d, cpu2 = %d\n", cpu1, cpu2);
        return 0;
    }

    pr_info("Old method spend time : %lld\n", ktime_to_ns(end_1 - start_1));
    pr_info("New method spend time : %lld\n", ktime_to_ns(end_2 - start_2));

    return 0;
}
---
 kernel/sched/fair.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2d16c8545..ce46f61da 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -9404,12 +9404,16 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
 			return 0;
 
 		/* Prevent to re-select dst_cpu via env's CPUs: */
-		for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
-			if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
-				env->flags |= LBF_DST_PINNED;
-				env->new_dst_cpu = cpu;
-				break;
-			}
+		struct cpumask dst_mask;
+
+		cpumask_and(&dst_mask, env->dst_grpmask, env->cpus);
+		cpumask_and(&dst_mask, &dst_mask, p->cpus_ptr);
+
+		cpu = cpumask_first(&dst_mask);
+
+		if (cpu < nr_cpu_ids) {
+			env->flags |= LBF_DST_PINNED;
+			env->new_dst_cpu = cpu;
 		}
 
 		return 0;
-- 
2.43.0
Re: [RFC PATCH] sched/fair: Refactor can_migrate_task() to elimate looping
Posted by Peter Zijlstra 11 months, 2 weeks ago
On Thu, Jan 09, 2025 at 01:29:47AM +0800, I Hsin Cheng wrote:
>  kernel/sched/fair.c | 16 ++++++++++------
>  1 file changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 2d16c8545..ce46f61da 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -9404,12 +9404,16 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
>  			return 0;
>  
>  		/* Prevent to re-select dst_cpu via env's CPUs: */
> -		for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
> -			if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
> -				env->flags |= LBF_DST_PINNED;
> -				env->new_dst_cpu = cpu;
> -				break;
> -			}
> +		struct cpumask dst_mask;

Except you cannot put cpumask on-stack...

> +
> +		cpumask_and(&dst_mask, env->dst_grpmask, env->cpus);
> +		cpumask_and(&dst_mask, &dst_mask, p->cpus_ptr);
> +
> +		cpu = cpumask_first(&dst_mask);
> +
> +		if (cpu < nr_cpu_ids) {
> +			env->flags |= LBF_DST_PINNED;
> +			env->new_dst_cpu = cpu;
>  		}
>  
>  		return 0;
> -- 
> 2.43.0
>
Re: [RFC PATCH] sched/fair: Refactor can_migrate_task() to elimate looping
Posted by I Hsin Cheng 11 months, 2 weeks ago
On Thu, Jan 09, 2025 at 10:46:27AM +0100, Peter Zijlstra wrote:
> On Thu, Jan 09, 2025 at 01:29:47AM +0800, I Hsin Cheng wrote:
> >  kernel/sched/fair.c | 16 ++++++++++------
> >  1 file changed, 10 insertions(+), 6 deletions(-)
> > 
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index 2d16c8545..ce46f61da 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -9404,12 +9404,16 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
> >  			return 0;
> >  
> >  		/* Prevent to re-select dst_cpu via env's CPUs: */
> > -		for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
> > -			if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
> > -				env->flags |= LBF_DST_PINNED;
> > -				env->new_dst_cpu = cpu;
> > -				break;
> > -			}
> > +		struct cpumask dst_mask;
> 
> Except you cannot put cpumask on-stack...
> 
> > +
> > +		cpumask_and(&dst_mask, env->dst_grpmask, env->cpus);
> > +		cpumask_and(&dst_mask, &dst_mask, p->cpus_ptr);
> > +
> > +		cpu = cpumask_first(&dst_mask);
> > +
> > +		if (cpu < nr_cpu_ids) {
> > +			env->flags |= LBF_DST_PINNED;
> > +			env->new_dst_cpu = cpu;
> >  		}
> >  
> >  		return 0;
> > -- 
> > 2.43.0
> >

> > Except you cannot put cpumask on-stack...

Oh I'm sorry, may I ask the reason? is it because cpumask tends to be
very large?
I assume we're not supposed to change the value of "env->dst_grpmask" or
"env->cpus" here, so in order to achieve something like this we need
another cpumask, explicitly allocate a cpumask on heap for this section
would be an overkill I think ?

Or maybe we can have kernel maintain a global cpumask so anyone wishing
to perform some operations will get to do it without maintain allocate a
cpumask themselves?
Re: [RFC PATCH] sched/fair: Refactor can_migrate_task() to elimate looping
Posted by Peter Zijlstra 11 months, 2 weeks ago
On Thu, Jan 09, 2025 at 06:12:22PM +0800, I Hsin Cheng wrote:

> > > Except you cannot put cpumask on-stack...
> 
> Oh I'm sorry, may I ask the reason? is it because cpumask tends to be
> very large?

Yes, when building with NR_CPUS=8192 (as Distos tend to do) it is 1K of
stack space.
Re: [RFC PATCH] sched/fair: Refactor can_migrate_task() to elimate looping
Posted by I Hsin Cheng 11 months, 2 weeks ago
On Thu, Jan 09, 2025 at 11:21:22AM +0100, Peter Zijlstra wrote:
> On Thu, Jan 09, 2025 at 06:12:22PM +0800, I Hsin Cheng wrote:
> 
> > > > Except you cannot put cpumask on-stack...
> > 
> > Oh I'm sorry, may I ask the reason? is it because cpumask tends to be
> > very large?
> 
> Yes, when building with NR_CPUS=8192 (as Distos tend to do) it is 1K of
> stack space.
>

Thanks for your explanation! I've made the requested changes using
"cpumask_first_and_and()" so there won't be additional cpumask needed.
Tests are done and I'm sending a v2 rfc patch.

-- Richard.
Re: [RFC PATCH] sched/fair: Refactor can_migrate_task() to elimate looping
Posted by Nysal Jan K.A. 11 months, 2 weeks ago
On Thu, Jan 09, 2025 at 01:29:47AM +0800, I Hsin Cheng wrote:
> @@ -9404,12 +9404,16 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
>  			return 0;
>  
>  		/* Prevent to re-select dst_cpu via env's CPUs: */
> -		for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
> -			if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
> -				env->flags |= LBF_DST_PINNED;
> -				env->new_dst_cpu = cpu;
> -				break;
> -			}
> +		struct cpumask dst_mask;
> +
> +		cpumask_and(&dst_mask, env->dst_grpmask, env->cpus);
> +		cpumask_and(&dst_mask, &dst_mask, p->cpus_ptr);
> +
> +		cpu = cpumask_first(&dst_mask);

Can cpumask_first_and_and() be used instead?

> +
> +		if (cpu < nr_cpu_ids) {
> +			env->flags |= LBF_DST_PINNED;
> +			env->new_dst_cpu = cpu;
>  		}
>  
>  		return 0;
> -- 
> 2.43.0
> 

--Nysal
Re: [RFC PATCH] sched/fair: Refactor can_migrate_task() to elimate looping
Posted by I Hsin Cheng 11 months, 2 weeks ago
On Thu, Jan 09, 2025 at 02:28:27PM +0530, Nysal Jan K.A. wrote:
> On Thu, Jan 09, 2025 at 01:29:47AM +0800, I Hsin Cheng wrote:
> > @@ -9404,12 +9404,16 @@ int can_migrate_task(struct task_struct *p, struct lb_env *env)
> >  			return 0;
> >  
> >  		/* Prevent to re-select dst_cpu via env's CPUs: */
> > -		for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
> > -			if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
> > -				env->flags |= LBF_DST_PINNED;
> > -				env->new_dst_cpu = cpu;
> > -				break;
> > -			}
> > +		struct cpumask dst_mask;
> > +
> > +		cpumask_and(&dst_mask, env->dst_grpmask, env->cpus);
> > +		cpumask_and(&dst_mask, &dst_mask, p->cpus_ptr);
> > +
> > +		cpu = cpumask_first(&dst_mask);
> 
> Can cpumask_first_and_and() be used instead?
> 
> > +
> > +		if (cpu < nr_cpu_ids) {
> > +			env->flags |= LBF_DST_PINNED;
> > +			env->new_dst_cpu = cpu;
> >  		}
> >  
> >  		return 0;
> > -- 
> > 2.43.0
> > 
> 
> --Nysal

> Can cpumask_first_and_and() be used instead?

That's a great idea! thank you.
That way we don't have to put redundant cpumask on stack, I'll test it
real soon and send the test result up.

--Richard