kernel/sched/fair.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-)
In cpu_util_without, When the task is in rq, we should
sub the task's util_est, however, the delayed_task->on_rq
is true, however, the delayed_task's util had been sub
when sleep, so there is no need to sub the delayed task's
util-est. So add the checking of delayed-task.
On the other hand, as said in [1], the logic of util_est's
enqueue/dequeue could be simplified.
So simplify it by aligning with the conditions of uclamp.
[1]https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/
Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
---
kernel/sched/fair.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c798d2795243..bebf40a0fa4e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
* Let's add the task's estimated utilization to the cfs_rq's
* estimated utilization, before we update schedutil.
*/
- if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
+ if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
util_est_enqueue(&rq->cfs, p);
if (flags & ENQUEUE_DELAYED) {
@@ -7168,7 +7168,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
*/
static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
- if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
+ if (!p->se.sched_delayed)
util_est_dequeue(&rq->cfs, p);
util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
@@ -8037,7 +8037,8 @@ cpu_util(int cpu, struct task_struct *p, int dst_cpu, int boost)
*/
if (dst_cpu == cpu)
util_est += _task_util_est(p);
- else if (p && unlikely(task_on_rq_queued(p) || current == p))
+ else if (p && unlikely(current == p ||
+ (task_on_rq_queued(p) && !p->se.sched_delayed)))
lsub_positive(&util_est, _task_util_est(p));
util = max(util, util_est);
--
2.25.1
On 14/03/2025 09:09, Xuewen Yan wrote:
> In cpu_util_without, When the task is in rq, we should
> sub the task's util_est, however, the delayed_task->on_rq
> is true, however, the delayed_task's util had been sub
> when sleep, so there is no need to sub the delayed task's
> util-est. So add the checking of delayed-task.
>
> On the other hand, as said in [1], the logic of util_est's
> enqueue/dequeue could be simplified.
> So simplify it by aligning with the conditions of uclamp.
>
> [1]https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/
>
> Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
> ---
> kernel/sched/fair.c | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index c798d2795243..bebf40a0fa4e 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> * Let's add the task's estimated utilization to the cfs_rq's
> * estimated utilization, before we update schedutil.
> */
> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
> + if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
> util_est_enqueue(&rq->cfs, p);
>
> if (flags & ENQUEUE_DELAYED) {
> @@ -7168,7 +7168,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
> */
> static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> {
> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
> + if (!p->se.sched_delayed)
> util_est_dequeue(&rq->cfs, p);
>
> util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
> @@ -8037,7 +8037,8 @@ cpu_util(int cpu, struct task_struct *p, int dst_cpu, int boost)
> */
> if (dst_cpu == cpu)
> util_est += _task_util_est(p);
> - else if (p && unlikely(task_on_rq_queued(p) || current == p))
> + else if (p && unlikely(current == p ||
> + (task_on_rq_queued(p) && !p->se.sched_delayed)))
> lsub_positive(&util_est, _task_util_est(p));
>
> util = max(util, util_est);
Tested this patch on several workloads and added util_est warnings. No
util_est overflow or underflow warnings were seen.
Tested-by: Hongyan Xia <hongyan.xia2@arm.com>
On 14/04/2025 15:39, Hongyan Xia wrote:
> On 14/03/2025 09:09, Xuewen Yan wrote:
>> In cpu_util_without, When the task is in rq, we should
>> sub the task's util_est, however, the delayed_task->on_rq
>> is true, however, the delayed_task's util had been sub
>> when sleep, so there is no need to sub the delayed task's
>> util-est. So add the checking of delayed-task.
>>
>> On the other hand, as said in [1], the logic of util_est's
>> enqueue/dequeue could be simplified.
>> So simplify it by aligning with the conditions of uclamp.
>>
>> [1]https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/
>>
>> Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
>> ---
>> kernel/sched/fair.c | 7 ++++---
>> 1 file changed, 4 insertions(+), 3 deletions(-)
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index c798d2795243..bebf40a0fa4e 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct
>> task_struct *p, int flags)
>> * Let's add the task's estimated utilization to the cfs_rq's
>> * estimated utilization, before we update schedutil.
>> */
>> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags &
>> ENQUEUE_RESTORE))))
>> + if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
>> util_est_enqueue(&rq->cfs, p);
>> if (flags & ENQUEUE_DELAYED) {
>> @@ -7168,7 +7168,7 @@ static int dequeue_entities(struct rq *rq,
>> struct sched_entity *se, int flags)
>> */
>> static bool dequeue_task_fair(struct rq *rq, struct task_struct *p,
>> int flags)
>> {
>> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags &
>> DEQUEUE_SAVE))))
>> + if (!p->se.sched_delayed)
>> util_est_dequeue(&rq->cfs, p);
>> util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
>> @@ -8037,7 +8037,8 @@ cpu_util(int cpu, struct task_struct *p, int
>> dst_cpu, int boost)
>> */
>> if (dst_cpu == cpu)
>> util_est += _task_util_est(p);
>> - else if (p && unlikely(task_on_rq_queued(p) || current == p))
>> + else if (p && unlikely(current == p ||
>> + (task_on_rq_queued(p) && !p->se.sched_delayed)))
>> lsub_positive(&util_est, _task_util_est(p));
>> util = max(util, util_est);
>
> Tested this patch on several workloads and added util_est warnings. No
> util_est overflow or underflow warnings were seen.
>
> Tested-by: Hongyan Xia <hongyan.xia2@arm.com>
Just to make sure, does this 'Tested-by' also apply to the current v2
version of this patch?
https://lkml.kernel.org/r/20250325014733.18405-1-xuewen.yan@unisoc.com
In this case Xuewen should apply it on his next version.
On 14/03/2025 10:09, Xuewen Yan wrote: > In cpu_util_without, When the task is in rq, we should > sub the task's util_est, however, the delayed_task->on_rq > is true, however, the delayed_task's util had been sub > when sleep, so there is no need to sub the delayed task's > util-est. So add the checking of delayed-task. > > On the other hand, as said in [1], the logic of util_est's > enqueue/dequeue could be simplified. > So simplify it by aligning with the conditions of uclamp. This flag simplification looks good to me. IMHO, you should submit this with the uclamp change so that we can call uclamp_rq_inc() before p->sched_class->enqueue_task(). To make sure the task which is enqueued with 'flags & ENQUEUE_DELAYED' is considered for cpufreq_update_util() in enqueue_task_fair() (Hongyan's finding in https://lkml.kernel.org/r/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com) I would prefer the less invasive fix you presented here: https://lkml.kernel.org/r/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com since uclamp is already a core thing (fair + rt), it works for current max aggregation and it's less invasive. [...] > @@ -8037,7 +8037,8 @@ cpu_util(int cpu, struct task_struct *p, int dst_cpu, int boost) > */ > if (dst_cpu == cpu) > util_est += _task_util_est(p); > - else if (p && unlikely(task_on_rq_queued(p) || current == p)) > + else if (p && unlikely(current == p || > + (task_on_rq_queued(p) && !p->se.sched_delayed))) > lsub_positive(&util_est, _task_util_est(p)); cpu_util(..., p != NULL, ...) is only used for select_task_rq_fair(). IMHO p->se.sched_delayed is not set there.
Hi hongyan On Wed, Mar 19, 2025 at 9:33 PM Dietmar Eggemann <dietmar.eggemann@arm.com> wrote: > > On 14/03/2025 10:09, Xuewen Yan wrote: > > In cpu_util_without, When the task is in rq, we should > > sub the task's util_est, however, the delayed_task->on_rq > > is true, however, the delayed_task's util had been sub > > when sleep, so there is no need to sub the delayed task's > > util-est. So add the checking of delayed-task. > > > > On the other hand, as said in [1], the logic of util_est's > > enqueue/dequeue could be simplified. > > So simplify it by aligning with the conditions of uclamp. > > This flag simplification looks good to me. > > IMHO, you should submit this with the uclamp change so that we can call > uclamp_rq_inc() before p->sched_class->enqueue_task(). To make sure the > task which is enqueued with 'flags & ENQUEUE_DELAYED' is considered for > cpufreq_update_util() in enqueue_task_fair() (Hongyan's finding in > https://lkml.kernel.org/r/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com) > > I would prefer the less invasive fix you presented here: > > https://lkml.kernel.org/r/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com > > since uclamp is already a core thing (fair + rt), it works for current > max aggregation and it's less invasive. > Since the uclamp's enqueue is affecting performance, could we fix the uclamp-enqueue issue first? Need I to send the patch-v2 base the https://lore.kernel.org/all/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com/T/#mb32d7675beda5cadc35a3c04cddebc39580c718b ? As for whether we need to distinguish sched-class, can we discuss that later? Thanks! > [...] > > > @@ -8037,7 +8037,8 @@ cpu_util(int cpu, struct task_struct *p, int dst_cpu, int boost) > > */ > > if (dst_cpu == cpu) > > util_est += _task_util_est(p); > > - else if (p && unlikely(task_on_rq_queued(p) || current == p)) > > + else if (p && unlikely(current == p || > > + (task_on_rq_queued(p) && !p->se.sched_delayed))) > > lsub_positive(&util_est, _task_util_est(p)); > > cpu_util(..., p != NULL, ...) is only used for select_task_rq_fair(). > IMHO p->se.sched_delayed is not set there. Hi Dietmar, You're right, there's really no need to add extra conditions here at the moment. Thanks!
On 21/03/2025 07:44, Xuewen Yan wrote: > Hi hongyan > > On Wed, Mar 19, 2025 at 9:33 PM Dietmar Eggemann > <dietmar.eggemann@arm.com> wrote: >> >> On 14/03/2025 10:09, Xuewen Yan wrote: >>> In cpu_util_without, When the task is in rq, we should >>> sub the task's util_est, however, the delayed_task->on_rq >>> is true, however, the delayed_task's util had been sub >>> when sleep, so there is no need to sub the delayed task's >>> util-est. So add the checking of delayed-task. >>> >>> On the other hand, as said in [1], the logic of util_est's >>> enqueue/dequeue could be simplified. >>> So simplify it by aligning with the conditions of uclamp. >> >> This flag simplification looks good to me. >> >> IMHO, you should submit this with the uclamp change so that we can call >> uclamp_rq_inc() before p->sched_class->enqueue_task(). To make sure the >> task which is enqueued with 'flags & ENQUEUE_DELAYED' is considered for >> cpufreq_update_util() in enqueue_task_fair() (Hongyan's finding in >> https://lkml.kernel.org/r/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com) >> >> I would prefer the less invasive fix you presented here: >> >> https://lkml.kernel.org/r/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com >> >> since uclamp is already a core thing (fair + rt), it works for current >> max aggregation and it's less invasive. >> > > Since the uclamp's enqueue is affecting performance, could we fix the > uclamp-enqueue issue first? > Need I to send the patch-v2 base the > https://lore.kernel.org/all/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com/T/#mb32d7675beda5cadc35a3c04cddebc39580c718b > ? That would be nice. Maybe a patch-set with 1) uclamp enqueue fix and 2) util_est flag simplification? So those two things stay together ... [...]
On 21/03/2025 06:44, Xuewen Yan wrote: > Hi hongyan > > On Wed, Mar 19, 2025 at 9:33 PM Dietmar Eggemann > <dietmar.eggemann@arm.com> wrote: >> >> On 14/03/2025 10:09, Xuewen Yan wrote: >>> In cpu_util_without, When the task is in rq, we should >>> sub the task's util_est, however, the delayed_task->on_rq >>> is true, however, the delayed_task's util had been sub >>> when sleep, so there is no need to sub the delayed task's >>> util-est. So add the checking of delayed-task. >>> >>> On the other hand, as said in [1], the logic of util_est's >>> enqueue/dequeue could be simplified. >>> So simplify it by aligning with the conditions of uclamp. >> >> This flag simplification looks good to me. >> >> IMHO, you should submit this with the uclamp change so that we can call >> uclamp_rq_inc() before p->sched_class->enqueue_task(). To make sure the >> task which is enqueued with 'flags & ENQUEUE_DELAYED' is considered for >> cpufreq_update_util() in enqueue_task_fair() (Hongyan's finding in >> https://lkml.kernel.org/r/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com) >> >> I would prefer the less invasive fix you presented here: >> >> https://lkml.kernel.org/r/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com >> >> since uclamp is already a core thing (fair + rt), it works for current >> max aggregation and it's less invasive. >> > > Since the uclamp's enqueue is affecting performance, could we fix the > uclamp-enqueue issue first? > Need I to send the patch-v2 base the > https://lore.kernel.org/all/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com/T/#mb32d7675beda5cadc35a3c04cddebc39580c718b > ? > As for whether we need to distinguish sched-class, can we discuss that later? > > Thanks! > Sure. > [...]
On 14/03/2025 09:09, Xuewen Yan wrote:
> In cpu_util_without, When the task is in rq, we should
> sub the task's util_est, however, the delayed_task->on_rq
> is true, however, the delayed_task's util had been sub
> when sleep, so there is no need to sub the delayed task's
> util-est. So add the checking of delayed-task.
>
> On the other hand, as said in [1], the logic of util_est's
> enqueue/dequeue could be simplified.
> So simplify it by aligning with the conditions of uclamp.
>
> [1]https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/
>
> Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
> ---
> kernel/sched/fair.c | 7 ++++---
> 1 file changed, 4 insertions(+), 3 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index c798d2795243..bebf40a0fa4e 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> * Let's add the task's estimated utilization to the cfs_rq's
> * estimated utilization, before we update schedutil.
> */
> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
> + if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
> util_est_enqueue(&rq->cfs, p);
>
> if (flags & ENQUEUE_DELAYED) {
> @@ -7168,7 +7168,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
> */
> static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> {
> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
> + if (!p->se.sched_delayed)
> util_est_dequeue(&rq->cfs, p);
>
> util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
> @@ -8037,7 +8037,8 @@ cpu_util(int cpu, struct task_struct *p, int dst_cpu, int boost)
> */
> if (dst_cpu == cpu)
> util_est += _task_util_est(p);
> - else if (p && unlikely(task_on_rq_queued(p) || current == p))
> + else if (p && unlikely(current == p ||
> + (task_on_rq_queued(p) && !p->se.sched_delayed)))
> lsub_positive(&util_est, _task_util_est(p));
>
> util = max(util, util_est);
Have you tested the above changes to make sure util_est enqueue dequeue
are balanced? util_est was broken for quite a while when merging delayed
dequeue because now enqueue_ and dequeue_task() do not always appear in
pairs. Since then, I always have a local patch like this (may be a bit
out of date now) to make sure util_est is balanced
https://lore.kernel.org/all/752ae417c02b9277ca3ec18893747c54dd5f277f.1724245193.git.hongyan.xia2@arm.com/
Hi Hongyan On Fri, Mar 14, 2025 at 9:14 PM Hongyan Xia <hongyan.xia2@arm.com> wrote: > Have you tested the above changes to make sure util_est enqueue dequeue > are balanced? util_est was broken for quite a while when merging delayed > dequeue because now enqueue_ and dequeue_task() do not always appear in > pairs. Since then, I always have a local patch like this (may be a bit > out of date now) to make sure util_est is balanced > > https://lore.kernel.org/all/752ae417c02b9277ca3ec18893747c54dd5f277f.1724245193.git.hongyan.xia2@arm.com/ I use your patch and test the patch in kernel-6.12, I haven't seen any corresponding alarm information yet.
© 2016 - 2025 Red Hat, Inc.