[PATCH] sched/fair: Fix cpu_util runnable_avg arithmetic

Hongyan Xia posted 1 patch 2 days, 22 hours ago
There is a newer version of this series
kernel/sched/fair.c | 22 ++++++++++++++--------
1 file changed, 14 insertions(+), 8 deletions(-)
[PATCH] sched/fair: Fix cpu_util runnable_avg arithmetic
Posted by Hongyan Xia 2 days, 22 hours ago
From: Hongyan Xia <hongyan.xia@transsion.com>

If we take runnable_avg in max(runnable_avg, util_avg) in cpu_util(), we
should then add or subtract task runnable_avg, but the arithmetic below
is still with task util_avg. This mixes runnable_avg with util_avg which
is incorrect.

Fix by always doing arithmetic with runnable_avg and only take
max(runnable_avg, util_avg) at the last step.

Fixes: 7d0583cf9ec7 ("sched/fair, cpufreq: Introduce 'runnable boosting'")
Signed-off-by: Hongyan Xia <hongyan.xia@transsion.com>
---
 kernel/sched/fair.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 69361c63353a..050bce06efea 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8231,25 +8231,31 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
 static unsigned long
 cpu_util(int cpu, struct task_struct *p, int dst_cpu, int boost)
 {
+	bool add_task = p && task_cpu(p) != cpu && dst_cpu == cpu;
+	bool sub_task = p && task_cpu(p) == cpu && dst_cpu != cpu;
 	struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
 	unsigned long util = READ_ONCE(cfs_rq->avg.util_avg);
 	unsigned long runnable;
 
-	if (boost) {
-		runnable = READ_ONCE(cfs_rq->avg.runnable_avg);
-		util = max(util, runnable);
-	}
-
 	/*
 	 * If @dst_cpu is -1 or @p migrates from @cpu to @dst_cpu remove its
 	 * contribution. If @p migrates from another CPU to @cpu add its
 	 * contribution. In all the other cases @cpu is not impacted by the
 	 * migration so its util_avg is already correct.
 	 */
-	if (p && task_cpu(p) == cpu && dst_cpu != cpu)
-		lsub_positive(&util, task_util(p));
-	else if (p && task_cpu(p) != cpu && dst_cpu == cpu)
+	if (add_task)
 		util += task_util(p);
+	else if (sub_task)
+		lsub_positive(&util, task_util(p));
+
+	if (boost) {
+		runnable = READ_ONCE(cfs_rq->avg.runnable_avg);
+		if (add_task)
+			runnable += task_runnable(p);
+		else if (sub_task)
+			lsub_positive(&runnable, task_runnable(p));
+		util = max(util, runnable);
+	}
 
 	if (sched_feat(UTIL_EST)) {
 		unsigned long util_est;
-- 
2.47.3

Re: [PATCH] sched/fair: Fix cpu_util runnable_avg arithmetic
Posted by Vincent Guittot 2 days, 20 hours ago
On Fri, 5 Jun 2026 at 09:01, Hongyan Xia <hongyan.xia@transsion.com> wrote:
>
> From: Hongyan Xia <hongyan.xia@transsion.com>
>
> If we take runnable_avg in max(runnable_avg, util_avg) in cpu_util(), we
> should then add or subtract task runnable_avg, but the arithmetic below
> is still with task util_avg. This mixes runnable_avg with util_avg which
> is incorrect.
>
> Fix by always doing arithmetic with runnable_avg and only take
> max(runnable_avg, util_avg) at the last step.
>
> Fixes: 7d0583cf9ec7 ("sched/fair, cpufreq: Introduce 'runnable boosting'")
> Signed-off-by: Hongyan Xia <hongyan.xia@transsion.com>
> ---
>  kernel/sched/fair.c | 22 ++++++++++++++--------
>  1 file changed, 14 insertions(+), 8 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 69361c63353a..050bce06efea 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -8231,25 +8231,31 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
>  static unsigned long
>  cpu_util(int cpu, struct task_struct *p, int dst_cpu, int boost)
>  {
> +       bool add_task = p && task_cpu(p) != cpu && dst_cpu == cpu;
> +       bool sub_task = p && task_cpu(p) == cpu && dst_cpu != cpu;
>         struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
>         unsigned long util = READ_ONCE(cfs_rq->avg.util_avg);
>         unsigned long runnable;
>
> -       if (boost) {
> -               runnable = READ_ONCE(cfs_rq->avg.runnable_avg);
> -               util = max(util, runnable);
> -       }
> -
>         /*
>          * If @dst_cpu is -1 or @p migrates from @cpu to @dst_cpu remove its
>          * contribution. If @p migrates from another CPU to @cpu add its
>          * contribution. In all the other cases @cpu is not impacted by the
>          * migration so its util_avg is already correct.
>          */
> -       if (p && task_cpu(p) == cpu && dst_cpu != cpu)
> -               lsub_positive(&util, task_util(p));
> -       else if (p && task_cpu(p) != cpu && dst_cpu == cpu)
> +       if (add_task)
>                 util += task_util(p);
> +       else if (sub_task)
> +               lsub_positive(&util, task_util(p));
> +
> +       if (boost) {
> +               runnable = READ_ONCE(cfs_rq->avg.runnable_avg);
> +               if (add_task)
> +                       runnable += task_runnable(p);

Where did you get the task_runnable() function? I can't find it in
tip/sched/core so either create it or call
READ_ONCE(p->se.avg.runnable_avg) directly


> +               else if (sub_task)
> +                       lsub_positive(&runnable, task_runnable(p));
> +               util = max(util, runnable);
> +       }
>
>         if (sched_feat(UTIL_EST)) {
>                 unsigned long util_est;
> --
> 2.47.3
>
Re: [PATCH] sched/fair: Fix cpu_util runnable_avg arithmetic
Posted by Christian Loehle 2 days, 20 hours ago
On 6/5/26 09:51, Vincent Guittot wrote:
> On Fri, 5 Jun 2026 at 09:01, Hongyan Xia <hongyan.xia@transsion.com> wrote:
>>
>> From: Hongyan Xia <hongyan.xia@transsion.com>
>>
>> If we take runnable_avg in max(runnable_avg, util_avg) in cpu_util(), we
>> should then add or subtract task runnable_avg, but the arithmetic below
>> is still with task util_avg. This mixes runnable_avg with util_avg which
>> is incorrect.
>>
>> Fix by always doing arithmetic with runnable_avg and only take
>> max(runnable_avg, util_avg) at the last step.
>>
>> Fixes: 7d0583cf9ec7 ("sched/fair, cpufreq: Introduce 'runnable boosting'")
>> Signed-off-by: Hongyan Xia <hongyan.xia@transsion.com>
>> ---
>>  kernel/sched/fair.c | 22 ++++++++++++++--------
>>  1 file changed, 14 insertions(+), 8 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 69361c63353a..050bce06efea 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -8231,25 +8231,31 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
>>  static unsigned long
>>  cpu_util(int cpu, struct task_struct *p, int dst_cpu, int boost)
>>  {
>> +       bool add_task = p && task_cpu(p) != cpu && dst_cpu == cpu;
>> +       bool sub_task = p && task_cpu(p) == cpu && dst_cpu != cpu;
>>         struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
>>         unsigned long util = READ_ONCE(cfs_rq->avg.util_avg);
>>         unsigned long runnable;
>>
>> -       if (boost) {
>> -               runnable = READ_ONCE(cfs_rq->avg.runnable_avg);
>> -               util = max(util, runnable);
>> -       }
>> -
>>         /*
>>          * If @dst_cpu is -1 or @p migrates from @cpu to @dst_cpu remove its
>>          * contribution. If @p migrates from another CPU to @cpu add its
>>          * contribution. In all the other cases @cpu is not impacted by the
>>          * migration so its util_avg is already correct.
>>          */
>> -       if (p && task_cpu(p) == cpu && dst_cpu != cpu)
>> -               lsub_positive(&util, task_util(p));
>> -       else if (p && task_cpu(p) != cpu && dst_cpu == cpu)
>> +       if (add_task)
>>                 util += task_util(p);
>> +       else if (sub_task)
>> +               lsub_positive(&util, task_util(p));
>> +
>> +       if (boost) {
>> +               runnable = READ_ONCE(cfs_rq->avg.runnable_avg);
>> +               if (add_task)
>> +                       runnable += task_runnable(p);
> 
> Where did you get the task_runnable() function? I can't find it in
> tip/sched/core so either create it or call
> READ_ONCE(p->se.avg.runnable_avg) directly

It's there for me on tip/sched/core at
7675361ff9a1 sched: deadline: Cleanup goto label in pick_earliest_pushable_dl_task
but also 7.1-rc6?

> 
> 
>> +               else if (sub_task)
>> +                       lsub_positive(&runnable, task_runnable(p));
>> +               util = max(util, runnable);
>> +       }
>>
>>         if (sched_feat(UTIL_EST)) {
>>                 unsigned long util_est;
>> --
>> 2.47.3
>>
>
Re: [PATCH] sched/fair: Fix cpu_util runnable_avg arithmetic
Posted by Christian Loehle 2 days, 20 hours ago
On 6/5/26 10:21, Christian Loehle wrote:
> On 6/5/26 09:51, Vincent Guittot wrote:
>> On Fri, 5 Jun 2026 at 09:01, Hongyan Xia <hongyan.xia@transsion.com> wrote:
>>>
>>> From: Hongyan Xia <hongyan.xia@transsion.com>
>>>
>>> If we take runnable_avg in max(runnable_avg, util_avg) in cpu_util(), we
>>> should then add or subtract task runnable_avg, but the arithmetic below
>>> is still with task util_avg. This mixes runnable_avg with util_avg which
>>> is incorrect.
>>>
>>> Fix by always doing arithmetic with runnable_avg and only take
>>> max(runnable_avg, util_avg) at the last step.
>>>
>>> Fixes: 7d0583cf9ec7 ("sched/fair, cpufreq: Introduce 'runnable boosting'")
>>> Signed-off-by: Hongyan Xia <hongyan.xia@transsion.com>
>>> ---
>>>  kernel/sched/fair.c | 22 ++++++++++++++--------
>>>  1 file changed, 14 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>>> index 69361c63353a..050bce06efea 100644
>>> --- a/kernel/sched/fair.c
>>> +++ b/kernel/sched/fair.c
>>> @@ -8231,25 +8231,31 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
>>>  static unsigned long
>>>  cpu_util(int cpu, struct task_struct *p, int dst_cpu, int boost)
>>>  {
>>> +       bool add_task = p && task_cpu(p) != cpu && dst_cpu == cpu;
>>> +       bool sub_task = p && task_cpu(p) == cpu && dst_cpu != cpu;
>>>         struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
>>>         unsigned long util = READ_ONCE(cfs_rq->avg.util_avg);
>>>         unsigned long runnable;
>>>
>>> -       if (boost) {
>>> -               runnable = READ_ONCE(cfs_rq->avg.runnable_avg);
>>> -               util = max(util, runnable);
>>> -       }
>>> -
>>>         /*
>>>          * If @dst_cpu is -1 or @p migrates from @cpu to @dst_cpu remove its
>>>          * contribution. If @p migrates from another CPU to @cpu add its
>>>          * contribution. In all the other cases @cpu is not impacted by the
>>>          * migration so its util_avg is already correct.
>>>          */
>>> -       if (p && task_cpu(p) == cpu && dst_cpu != cpu)
>>> -               lsub_positive(&util, task_util(p));
>>> -       else if (p && task_cpu(p) != cpu && dst_cpu == cpu)
>>> +       if (add_task)
>>>                 util += task_util(p);
>>> +       else if (sub_task)
>>> +               lsub_positive(&util, task_util(p));
>>> +
>>> +       if (boost) {
>>> +               runnable = READ_ONCE(cfs_rq->avg.runnable_avg);
>>> +               if (add_task)
>>> +                       runnable += task_runnable(p);
>>
>> Where did you get the task_runnable() function? I can't find it in
>> tip/sched/core so either create it or call
>> READ_ONCE(p->se.avg.runnable_avg) directly
> 
> It's there for me on tip/sched/core at
> 7675361ff9a1 sched: deadline: Cleanup goto label in pick_earliest_pushable_dl_task
> but also 7.1-rc6?

nevermind....