kernel/sched/core.c | 17 ++++++++++------- kernel/sched/fair.c | 4 ++-- 2 files changed, 12 insertions(+), 9 deletions(-)
When task's uclamp is set, we hope that the CPU frequency
can increase as quickly as possible when the task is enqueued.
Because the cpu frequency updating happens during the enqueue_task(),
so the rq's uclamp needs to be updated before the task is enqueued,
just like util_est.
So, aline the uclamp and util_est and call before freq update.
For sched-delayed tasks, the rq uclamp/util_est should only be updated
when they are enqueued upon being awakened.
So simply the logic of util_est's enqueue/dequeue check.
Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
---
v2:
- simply the util-est's en/dequeue check;
---
Previous discussion:
https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/
https://lore.kernel.org/all/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com/T/#u
https://lore.kernel.org/all/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com/T/#u
https://lore.kernel.org/all/aa8baf67-a8ec-4ad8-a6a8-afdcd7036771@arm.com/
---
kernel/sched/core.c | 17 ++++++++++-------
kernel/sched/fair.c | 4 ++--
2 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 042351c7afce..72fbe2031e54 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1747,7 +1747,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
}
}
-static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
+static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags)
{
enum uclamp_id clamp_id;
@@ -1763,7 +1763,8 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
if (unlikely(!p->sched_class->uclamp_enabled))
return;
- if (p->se.sched_delayed)
+ /* Only inc the delayed task which being woken up. */
+ if (p->se.sched_delayed && !(flags & ENQUEUE_DELAYED))
return;
for_each_clamp_id(clamp_id)
@@ -2031,7 +2032,7 @@ static void __init init_uclamp(void)
}
#else /* !CONFIG_UCLAMP_TASK */
-static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
+static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags) { }
static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
static inline void uclamp_fork(struct task_struct *p) { }
static inline void uclamp_post_fork(struct task_struct *p) { }
@@ -2067,12 +2068,14 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
if (!(flags & ENQUEUE_NOCLOCK))
update_rq_clock(rq);
- p->sched_class->enqueue_task(rq, p, flags);
/*
- * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
- * ->sched_delayed.
+ * Can be before ->enqueue_task() because uclamp considers the
+ * ENQUEUE_DELAYED task before its ->sched_delayed gets cleared
+ * in ->enqueue_task().
*/
- uclamp_rq_inc(rq, p);
+ uclamp_rq_inc(rq, p, flags);
+
+ p->sched_class->enqueue_task(rq, p, flags);
psi_enqueue(p, flags);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c798d2795243..c92fee07fb7b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
* Let's add the task's estimated utilization to the cfs_rq's
* estimated utilization, before we update schedutil.
*/
- if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
+ if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
util_est_enqueue(&rq->cfs, p);
if (flags & ENQUEUE_DELAYED) {
@@ -7168,7 +7168,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
*/
static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
- if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
+ if (!p->se.sched_delayed)
util_est_dequeue(&rq->cfs, p);
util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
--
2.25.1
On Tue, 2025-03-25 at 09:47 +0800, Xuewen Yan wrote:
> When task's uclamp is set, we hope that the CPU frequency
> can increase as quickly as possible when the task is enqueued.
> Because the cpu frequency updating happens during the enqueue_task(),
> so the rq's uclamp needs to be updated before the task is enqueued,
> just like util_est.
> So, aline the uclamp and util_est and call before freq update.
>
> For sched-delayed tasks, the rq uclamp/util_est should only be
> updated
> when they are enqueued upon being awakened.
> So simply the logic of util_est's enqueue/dequeue check.
>
> Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
> ---
> v2:
> - simply the util-est's en/dequeue check;
> ---
> Previous discussion:
> https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/
> https://lore.kernel.org/all/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com/T/#u
> https://lore.kernel.org/all/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com/T/#u
> https://lore.kernel.org/all/aa8baf67-a8ec-4ad8-a6a8-afdcd7036771@arm.com/
> ---
> kernel/sched/core.c | 17 ++++++++++-------
> kernel/sched/fair.c | 4 ++--
> 2 files changed, 12 insertions(+), 9 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 042351c7afce..72fbe2031e54 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1747,7 +1747,7 @@ static inline void uclamp_rq_dec_id(struct rq
> *rq, struct task_struct *p,
> }
> }
>
> -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct
> *p)
> +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct
> *p, int flags)
> {
> enum uclamp_id clamp_id;
>
> @@ -1763,7 +1763,8 @@ static inline void uclamp_rq_inc(struct rq *rq,
> struct task_struct *p)
> if (unlikely(!p->sched_class->uclamp_enabled))
> return;
>
> - if (p->se.sched_delayed)
> + /* Only inc the delayed task which being woken up. */
> + if (p->se.sched_delayed && !(flags & ENQUEUE_DELAYED))
> return;
>
> for_each_clamp_id(clamp_id)
> @@ -2031,7 +2032,7 @@ static void __init init_uclamp(void)
> }
>
> #else /* !CONFIG_UCLAMP_TASK */
> -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct
> *p) { }
> +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct
> *p, int flags) { }
> static inline void uclamp_rq_dec(struct rq *rq, struct task_struct
> *p) { }
> static inline void uclamp_fork(struct task_struct *p) { }
> static inline void uclamp_post_fork(struct task_struct *p) { }
> @@ -2067,12 +2068,14 @@ void enqueue_task(struct rq *rq, struct
> task_struct *p, int flags)
> if (!(flags & ENQUEUE_NOCLOCK))
> update_rq_clock(rq);
>
> - p->sched_class->enqueue_task(rq, p, flags);
> /*
> - * Must be after ->enqueue_task() because ENQUEUE_DELAYED
> can clear
> - * ->sched_delayed.
> + * Can be before ->enqueue_task() because uclamp considers
> the
> + * ENQUEUE_DELAYED task before its ->sched_delayed gets
> cleared
> + * in ->enqueue_task().
> */
> - uclamp_rq_inc(rq, p);
> + uclamp_rq_inc(rq, p, flags);
> +
> + p->sched_class->enqueue_task(rq, p, flags);
>
> psi_enqueue(p, flags);
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index c798d2795243..c92fee07fb7b 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct
> task_struct *p, int flags)
> * Let's add the task's estimated utilization to the
> cfs_rq's
> * estimated utilization, before we update schedutil.
> */
> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) ||
> (flags & ENQUEUE_RESTORE))))
> + if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
> util_est_enqueue(&rq->cfs, p);
>
> if (flags & ENQUEUE_DELAYED) {
> @@ -7168,7 +7168,7 @@ static int dequeue_entities(struct rq *rq,
> struct sched_entity *se, int flags)
> */
> static bool dequeue_task_fair(struct rq *rq, struct task_struct *p,
> int flags)
> {
> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) ||
> (flags & DEQUEUE_SAVE))))
> + if (!p->se.sched_delayed)
> util_est_dequeue(&rq->cfs, p);
>
> util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
The HW composer detects the frame status and uses uclamp to adjust the
CPU frequency for specific tasks.
After applying this patch, there is a significant improvement in the
performance of the dispatch system.
==================================================================
Test : FHD(1920x1080) video playback 30fps in 60hz frame rate.
Units : jank(frame drop)
Interpretation: lower is better
Statistic : AMean in one minutes
==================================================================
asis with patch
6-janks 0-janks
On Tue, 25 Mar 2025 at 02:48, Xuewen Yan <xuewen.yan@unisoc.com> wrote:
>
> When task's uclamp is set, we hope that the CPU frequency
> can increase as quickly as possible when the task is enqueued.
> Because the cpu frequency updating happens during the enqueue_task(),
Strictly speaking, it doesn't happen during enqueue_task but when :
- attach/detach tasks when migrating
- update_load_avg decayed
- io_wait
This often happens during an enqueue but not always ...
> so the rq's uclamp needs to be updated before the task is enqueued,
this doesn't ensure that new rq's uclamp will be taken into account
> just like util_est.
just like util_est
> So, aline the uclamp and util_est and call before freq update.
nit s/aline/align/ ?
>
> For sched-delayed tasks, the rq uclamp/util_est should only be updated
> when they are enqueued upon being awakened.
> So simply the logic of util_est's enqueue/dequeue check.
>
> Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
> ---
> v2:
> - simply the util-est's en/dequeue check;
> ---
> Previous discussion:
> https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/
> https://lore.kernel.org/all/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com/T/#u
> https://lore.kernel.org/all/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com/T/#u
> https://lore.kernel.org/all/aa8baf67-a8ec-4ad8-a6a8-afdcd7036771@arm.com/
> ---
> kernel/sched/core.c | 17 ++++++++++-------
> kernel/sched/fair.c | 4 ++--
> 2 files changed, 12 insertions(+), 9 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 042351c7afce..72fbe2031e54 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1747,7 +1747,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
> }
> }
>
> -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags)
> {
> enum uclamp_id clamp_id;
>
> @@ -1763,7 +1763,8 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> if (unlikely(!p->sched_class->uclamp_enabled))
> return;
>
> - if (p->se.sched_delayed)
> + /* Only inc the delayed task which being woken up. */
> + if (p->se.sched_delayed && !(flags & ENQUEUE_DELAYED))
> return;
>
> for_each_clamp_id(clamp_id)
> @@ -2031,7 +2032,7 @@ static void __init init_uclamp(void)
> }
>
> #else /* !CONFIG_UCLAMP_TASK */
> -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
> +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags) { }
> static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
> static inline void uclamp_fork(struct task_struct *p) { }
> static inline void uclamp_post_fork(struct task_struct *p) { }
> @@ -2067,12 +2068,14 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> if (!(flags & ENQUEUE_NOCLOCK))
> update_rq_clock(rq);
>
> - p->sched_class->enqueue_task(rq, p, flags);
> /*
> - * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
> - * ->sched_delayed.
> + * Can be before ->enqueue_task() because uclamp considers the
> + * ENQUEUE_DELAYED task before its ->sched_delayed gets cleared
> + * in ->enqueue_task().
> */
> - uclamp_rq_inc(rq, p);
> + uclamp_rq_inc(rq, p, flags);
> +
> + p->sched_class->enqueue_task(rq, p, flags);
>
> psi_enqueue(p, flags);
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index c798d2795243..c92fee07fb7b 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> * Let's add the task's estimated utilization to the cfs_rq's
> * estimated utilization, before we update schedutil.
> */
> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
> + if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
commit message doesn't explain why you change util_est condition
> util_est_enqueue(&rq->cfs, p);
>
> if (flags & ENQUEUE_DELAYED) {
> @@ -7168,7 +7168,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
> */
> static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> {
> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
> + if (!p->se.sched_delayed)
same here, you should explain in commit message why it's okay to do so
> util_est_dequeue(&rq->cfs, p);
>
> util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
> --
> 2.25.1
>
On Wed, Apr 16, 2025 at 1:05 AM Vincent Guittot
<vincent.guittot@linaro.org> wrote:
>
> On Tue, 25 Mar 2025 at 02:48, Xuewen Yan <xuewen.yan@unisoc.com> wrote:
> >
> > When task's uclamp is set, we hope that the CPU frequency
> > can increase as quickly as possible when the task is enqueued.
> > Because the cpu frequency updating happens during the enqueue_task(),
>
> Strictly speaking, it doesn't happen during enqueue_task but when :
> - attach/detach tasks when migrating
> - update_load_avg decayed
> - io_wait
>
> This often happens during an enqueue but not always ...
Okay, I would make some adjustments to these descriptions.
>
> > so the rq's uclamp needs to be updated before the task is enqueued,
>
> this doesn't ensure that new rq's uclamp will be taken into account
Did I miss something?
As following stack:
enqueue_task_fair()
update_load_avg()
cfs_rq_util_change(cfs_rq, 0);
cpufreq_update_util()
sugov_update_shared()
sugov_next_freq_shared()
sugov_get_util()
effective_cpu_util()
*min = max(irq + cpu_bw_dl(rq), uclamp_rq_get(rq, UCLAMP_MIN));
*max = min(scale, uclamp_rq_get(rq, UCLAMP_MAX));
So, the rq's uclamp value should update before enqueue_task().
>
> > just like util_est.
>
> just like util_est
>
> > So, aline the uclamp and util_est and call before freq update.
>
> nit s/aline/align/ ?
align.
>
> >
> > For sched-delayed tasks, the rq uclamp/util_est should only be updated
> > when they are enqueued upon being awakened.
> > So simply the logic of util_est's enqueue/dequeue check.
> >
> > Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
> > ---
> > v2:
> > - simply the util-est's en/dequeue check;
> > ---
> > Previous discussion:
> > https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/
> > https://lore.kernel.org/all/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com/T/#u
> > https://lore.kernel.org/all/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com/T/#u
> > https://lore.kernel.org/all/aa8baf67-a8ec-4ad8-a6a8-afdcd7036771@arm.com/
> > ---
> > kernel/sched/core.c | 17 ++++++++++-------
> > kernel/sched/fair.c | 4 ++--
> > 2 files changed, 12 insertions(+), 9 deletions(-)
> >
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index 042351c7afce..72fbe2031e54 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -1747,7 +1747,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
> > }
> > }
> >
> > -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> > +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags)
> > {
> > enum uclamp_id clamp_id;
> >
> > @@ -1763,7 +1763,8 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> > if (unlikely(!p->sched_class->uclamp_enabled))
> > return;
> >
> > - if (p->se.sched_delayed)
> > + /* Only inc the delayed task which being woken up. */
> > + if (p->se.sched_delayed && !(flags & ENQUEUE_DELAYED))
> > return;
> >
> > for_each_clamp_id(clamp_id)
> > @@ -2031,7 +2032,7 @@ static void __init init_uclamp(void)
> > }
> >
> > #else /* !CONFIG_UCLAMP_TASK */
> > -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
> > +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags) { }
> > static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
> > static inline void uclamp_fork(struct task_struct *p) { }
> > static inline void uclamp_post_fork(struct task_struct *p) { }
> > @@ -2067,12 +2068,14 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> > if (!(flags & ENQUEUE_NOCLOCK))
> > update_rq_clock(rq);
> >
> > - p->sched_class->enqueue_task(rq, p, flags);
> > /*
> > - * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
> > - * ->sched_delayed.
> > + * Can be before ->enqueue_task() because uclamp considers the
> > + * ENQUEUE_DELAYED task before its ->sched_delayed gets cleared
> > + * in ->enqueue_task().
> > */
> > - uclamp_rq_inc(rq, p);
> > + uclamp_rq_inc(rq, p, flags);
> > +
> > + p->sched_class->enqueue_task(rq, p, flags);
> >
> > psi_enqueue(p, flags);
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index c798d2795243..c92fee07fb7b 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > * Let's add the task's estimated utilization to the cfs_rq's
> > * estimated utilization, before we update schedutil.
> > */
> > - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
> > + if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
>
> commit message doesn't explain why you change util_est condition
Because, the sched_delayed flag is set when dequeue_entity, and clear
after the condition,
so for migrating/prio_change, we could just check the sched_delayed.
And for the wakeup, because the the sched_delayed flag is cleared after this,
so use the ENQUEUE_DELAYED flag to ensure the util_est could enqueue.
>
> > util_est_enqueue(&rq->cfs, p);
> >
> > if (flags & ENQUEUE_DELAYED) {
> > @@ -7168,7 +7168,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
> > */
> > static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > {
> > - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
> > + if (!p->se.sched_delayed)
>
> same here, you should explain in commit message why it's okay to do so
Same as above, the sched_delayed flag is set when dequeue_entity, so
this place,
the sched_delayed was not set when sleeping, If the flag is set, it
indicates that it
was migrating or prio changing.
By the way, I will kindly update these reasons in the commit message.
Thanks!
BR
---
xuewen
On Wed, 16 Apr 2025 at 04:55, Xuewen Yan <xuewen.yan94@gmail.com> wrote:
>
> On Wed, Apr 16, 2025 at 1:05 AM Vincent Guittot
> <vincent.guittot@linaro.org> wrote:
> >
> > On Tue, 25 Mar 2025 at 02:48, Xuewen Yan <xuewen.yan@unisoc.com> wrote:
> > >
> > > When task's uclamp is set, we hope that the CPU frequency
> > > can increase as quickly as possible when the task is enqueued.
> > > Because the cpu frequency updating happens during the enqueue_task(),
> >
> > Strictly speaking, it doesn't happen during enqueue_task but when :
> > - attach/detach tasks when migrating
> > - update_load_avg decayed
> > - io_wait
> >
> > This often happens during an enqueue but not always ...
>
> Okay, I would make some adjustments to these descriptions.
>
> >
> > > so the rq's uclamp needs to be updated before the task is enqueued,
> >
> > this doesn't ensure that new rq's uclamp will be taken into account
>
> Did I miss something?
>
> As following stack:
> enqueue_task_fair()
> update_load_avg()
> cfs_rq_util_change(cfs_rq, 0);
As mentioned above, this doesn't always happen so you are not ensured
to take uclamp into account. If you mandate to take uclamp value into
account immediately this is not enough
> cpufreq_update_util()
> sugov_update_shared()
> sugov_next_freq_shared()
> sugov_get_util()
> effective_cpu_util()
> *min = max(irq + cpu_bw_dl(rq), uclamp_rq_get(rq, UCLAMP_MIN));
> *max = min(scale, uclamp_rq_get(rq, UCLAMP_MAX));
>
> So, the rq's uclamp value should update before enqueue_task().
> >
> > > just like util_est.
> >
> > just like util_est
> >
> > > So, aline the uclamp and util_est and call before freq update.
> >
> > nit s/aline/align/ ?
> align.
> >
> > >
> > > For sched-delayed tasks, the rq uclamp/util_est should only be updated
> > > when they are enqueued upon being awakened.
> > > So simply the logic of util_est's enqueue/dequeue check.
> > >
> > > Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
> > > ---
> > > v2:
> > > - simply the util-est's en/dequeue check;
> > > ---
> > > Previous discussion:
> > > https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/
> > > https://lore.kernel.org/all/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com/T/#u
> > > https://lore.kernel.org/all/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com/T/#u
> > > https://lore.kernel.org/all/aa8baf67-a8ec-4ad8-a6a8-afdcd7036771@arm.com/
> > > ---
> > > kernel/sched/core.c | 17 ++++++++++-------
> > > kernel/sched/fair.c | 4 ++--
> > > 2 files changed, 12 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > > index 042351c7afce..72fbe2031e54 100644
> > > --- a/kernel/sched/core.c
> > > +++ b/kernel/sched/core.c
> > > @@ -1747,7 +1747,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
> > > }
> > > }
> > >
> > > -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> > > +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags)
> > > {
> > > enum uclamp_id clamp_id;
> > >
> > > @@ -1763,7 +1763,8 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> > > if (unlikely(!p->sched_class->uclamp_enabled))
> > > return;
> > >
> > > - if (p->se.sched_delayed)
> > > + /* Only inc the delayed task which being woken up. */
> > > + if (p->se.sched_delayed && !(flags & ENQUEUE_DELAYED))
> > > return;
> > >
> > > for_each_clamp_id(clamp_id)
> > > @@ -2031,7 +2032,7 @@ static void __init init_uclamp(void)
> > > }
> > >
> > > #else /* !CONFIG_UCLAMP_TASK */
> > > -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
> > > +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags) { }
> > > static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
> > > static inline void uclamp_fork(struct task_struct *p) { }
> > > static inline void uclamp_post_fork(struct task_struct *p) { }
> > > @@ -2067,12 +2068,14 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> > > if (!(flags & ENQUEUE_NOCLOCK))
> > > update_rq_clock(rq);
> > >
> > > - p->sched_class->enqueue_task(rq, p, flags);
> > > /*
> > > - * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
> > > - * ->sched_delayed.
> > > + * Can be before ->enqueue_task() because uclamp considers the
> > > + * ENQUEUE_DELAYED task before its ->sched_delayed gets cleared
> > > + * in ->enqueue_task().
> > > */
> > > - uclamp_rq_inc(rq, p);
> > > + uclamp_rq_inc(rq, p, flags);
> > > +
> > > + p->sched_class->enqueue_task(rq, p, flags);
> > >
> > > psi_enqueue(p, flags);
> > >
> > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > > index c798d2795243..c92fee07fb7b 100644
> > > --- a/kernel/sched/fair.c
> > > +++ b/kernel/sched/fair.c
> > > @@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > > * Let's add the task's estimated utilization to the cfs_rq's
> > > * estimated utilization, before we update schedutil.
> > > */
> > > - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
> > > + if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
> >
> > commit message doesn't explain why you change util_est condition
>
> Because, the sched_delayed flag is set when dequeue_entity, and clear
> after the condition,
> so for migrating/prio_change, we could just check the sched_delayed.
Why is testing sched_delayed enough for migrating/prio_change ?
With your change, we will remove then add back util_est when changing
prio of the task which is useless
> And for the wakeup, because the the sched_delayed flag is cleared after this,
> so use the ENQUEUE_DELAYED flag to ensure the util_est could enqueue.
>
> >
> > > util_est_enqueue(&rq->cfs, p);
> > >
> > > if (flags & ENQUEUE_DELAYED) {
> > > @@ -7168,7 +7168,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
> > > */
> > > static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > > {
> > > - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
> > > + if (!p->se.sched_delayed)
> >
> > same here, you should explain in commit message why it's okay to do so
>
> Same as above, the sched_delayed flag is set when dequeue_entity, so
> this place,
> the sched_delayed was not set when sleeping, If the flag is set, it
> indicates that it
> was migrating or prio changing.
>
> By the way, I will kindly update these reasons in the commit message.
>
> Thanks!
>
> BR
> ---
> xuewen
On Wed, Apr 16, 2025 at 5:42 PM Vincent Guittot
<vincent.guittot@linaro.org> wrote:
>
> On Wed, 16 Apr 2025 at 04:55, Xuewen Yan <xuewen.yan94@gmail.com> wrote:
> >
> > On Wed, Apr 16, 2025 at 1:05 AM Vincent Guittot
> > <vincent.guittot@linaro.org> wrote:
> > >
> > > On Tue, 25 Mar 2025 at 02:48, Xuewen Yan <xuewen.yan@unisoc.com> wrote:
> > > >
> > > > When task's uclamp is set, we hope that the CPU frequency
> > > > can increase as quickly as possible when the task is enqueued.
> > > > Because the cpu frequency updating happens during the enqueue_task(),
> > >
> > > Strictly speaking, it doesn't happen during enqueue_task but when :
> > > - attach/detach tasks when migrating
> > > - update_load_avg decayed
> > > - io_wait
> > >
> > > This often happens during an enqueue but not always ...
> >
> > Okay, I would make some adjustments to these descriptions.
> >
> > >
> > > > so the rq's uclamp needs to be updated before the task is enqueued,
> > >
> > > this doesn't ensure that new rq's uclamp will be taken into account
> >
> > Did I miss something?
> >
> > As following stack:
> > enqueue_task_fair()
> > update_load_avg()
> > cfs_rq_util_change(cfs_rq, 0);
>
> As mentioned above, this doesn't always happen so you are not ensured
> to take uclamp into account. If you mandate to take uclamp value into
> account immediately this is not enough
I understand your point now. I think what you're referring to is a
different issue, just like what we discussed earlier with Prateek:
https://lore.kernel.org/all/CAB8ipk_1=U_HgVQrfQ4VRUDrcHJBQd2LJ9aXh8PG6E-Z4_xS+g@mail.gmail.com/
However, I think the purpose of this patch is to ensure that during
the enqueue_task process, if a frequency change is triggered, the
uclamp has already been updated before the frequency is changed.
>
> > cpufreq_update_util()
> > sugov_update_shared()
> > sugov_next_freq_shared()
> > sugov_get_util()
> > effective_cpu_util()
> > *min = max(irq + cpu_bw_dl(rq), uclamp_rq_get(rq, UCLAMP_MIN));
> > *max = min(scale, uclamp_rq_get(rq, UCLAMP_MAX));
> >
> > So, the rq's uclamp value should update before enqueue_task().
> > >
> > > > just like util_est.
> > >
> > > just like util_est
> > >
> > > > So, aline the uclamp and util_est and call before freq update.
> > >
> > > nit s/aline/align/ ?
> > align.
> > >
> > > >
> > > > For sched-delayed tasks, the rq uclamp/util_est should only be updated
> > > > when they are enqueued upon being awakened.
> > > > So simply the logic of util_est's enqueue/dequeue check.
> > > >
> > > > Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
> > > > ---
> > > > v2:
> > > > - simply the util-est's en/dequeue check;
> > > > ---
> > > > Previous discussion:
> > > > https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/
> > > > https://lore.kernel.org/all/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com/T/#u
> > > > https://lore.kernel.org/all/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com/T/#u
> > > > https://lore.kernel.org/all/aa8baf67-a8ec-4ad8-a6a8-afdcd7036771@arm.com/
> > > > ---
> > > > kernel/sched/core.c | 17 ++++++++++-------
> > > > kernel/sched/fair.c | 4 ++--
> > > > 2 files changed, 12 insertions(+), 9 deletions(-)
> > > >
> > > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > > > index 042351c7afce..72fbe2031e54 100644
> > > > --- a/kernel/sched/core.c
> > > > +++ b/kernel/sched/core.c
> > > > @@ -1747,7 +1747,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
> > > > }
> > > > }
> > > >
> > > > -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> > > > +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags)
> > > > {
> > > > enum uclamp_id clamp_id;
> > > >
> > > > @@ -1763,7 +1763,8 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> > > > if (unlikely(!p->sched_class->uclamp_enabled))
> > > > return;
> > > >
> > > > - if (p->se.sched_delayed)
> > > > + /* Only inc the delayed task which being woken up. */
> > > > + if (p->se.sched_delayed && !(flags & ENQUEUE_DELAYED))
> > > > return;
> > > >
> > > > for_each_clamp_id(clamp_id)
> > > > @@ -2031,7 +2032,7 @@ static void __init init_uclamp(void)
> > > > }
> > > >
> > > > #else /* !CONFIG_UCLAMP_TASK */
> > > > -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
> > > > +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags) { }
> > > > static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
> > > > static inline void uclamp_fork(struct task_struct *p) { }
> > > > static inline void uclamp_post_fork(struct task_struct *p) { }
> > > > @@ -2067,12 +2068,14 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> > > > if (!(flags & ENQUEUE_NOCLOCK))
> > > > update_rq_clock(rq);
> > > >
> > > > - p->sched_class->enqueue_task(rq, p, flags);
> > > > /*
> > > > - * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
> > > > - * ->sched_delayed.
> > > > + * Can be before ->enqueue_task() because uclamp considers the
> > > > + * ENQUEUE_DELAYED task before its ->sched_delayed gets cleared
> > > > + * in ->enqueue_task().
> > > > */
> > > > - uclamp_rq_inc(rq, p);
> > > > + uclamp_rq_inc(rq, p, flags);
> > > > +
> > > > + p->sched_class->enqueue_task(rq, p, flags);
> > > >
> > > > psi_enqueue(p, flags);
> > > >
> > > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > > > index c798d2795243..c92fee07fb7b 100644
> > > > --- a/kernel/sched/fair.c
> > > > +++ b/kernel/sched/fair.c
> > > > @@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > > > * Let's add the task's estimated utilization to the cfs_rq's
> > > > * estimated utilization, before we update schedutil.
> > > > */
> > > > - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
> > > > + if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
> > >
> > > commit message doesn't explain why you change util_est condition
> >
> > Because, the sched_delayed flag is set when dequeue_entity, and clear
> > after the condition,
> > so for migrating/prio_change, we could just check the sched_delayed.
>
> Why is testing sched_delayed enough for migrating/prio_change ?
> With your change, we will remove then add back util_est when changing
> prio of the task which is useless
I sincerely apologize for any misunderstanding my previous description
may have caused.
When changing prio without changing class, the delayed_task's
sched_delayed flag is not changed,
we would not remove then add back util_est.
If the class was changed:
if (prev_class != next_class && p->se.sched_delayed)
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED |
DEQUEUE_NOCLOCK);
It will dequeue the delayed-task first, and will not enqueue it.
As for normal tasks which are not delayed, indeed, the issue you
mentioned can occur, but it seems that this problem has always
existed. Perhaps this is a new issue that has come up.
Thanks!
---
xuewen.yan
>
>
> > And for the wakeup, because the the sched_delayed flag is cleared after this,
> > so use the ENQUEUE_DELAYED flag to ensure the util_est could enqueue.
> >
> > >
> > > > util_est_enqueue(&rq->cfs, p);
> > > >
> > > > if (flags & ENQUEUE_DELAYED) {
> > > > @@ -7168,7 +7168,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
> > > > */
> > > > static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > > > {
> > > > - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
> > > > + if (!p->se.sched_delayed)
> > >
> > > same here, you should explain in commit message why it's okay to do so
> >
> > Same as above, the sched_delayed flag is set when dequeue_entity, so
> > this place,
> > the sched_delayed was not set when sleeping, If the flag is set, it
> > indicates that it
> > was migrating or prio changing.
> >
> > By the way, I will kindly update these reasons in the commit message.
> >
> > Thanks!
> >
> > BR
> > ---
> > xuewen
On Wed, 16 Apr 2025 at 13:07, Xuewen Yan <xuewen.yan94@gmail.com> wrote:
>
> On Wed, Apr 16, 2025 at 5:42 PM Vincent Guittot
> <vincent.guittot@linaro.org> wrote:
> >
> > On Wed, 16 Apr 2025 at 04:55, Xuewen Yan <xuewen.yan94@gmail.com> wrote:
> > >
> > > On Wed, Apr 16, 2025 at 1:05 AM Vincent Guittot
> > > <vincent.guittot@linaro.org> wrote:
> > > >
> > > > On Tue, 25 Mar 2025 at 02:48, Xuewen Yan <xuewen.yan@unisoc.com> wrote:
> > > > >
> > > > > When task's uclamp is set, we hope that the CPU frequency
> > > > > can increase as quickly as possible when the task is enqueued.
> > > > > Because the cpu frequency updating happens during the enqueue_task(),
> > > >
> > > > Strictly speaking, it doesn't happen during enqueue_task but when :
> > > > - attach/detach tasks when migrating
> > > > - update_load_avg decayed
> > > > - io_wait
> > > >
> > > > This often happens during an enqueue but not always ...
> > >
> > > Okay, I would make some adjustments to these descriptions.
> > >
> > > >
> > > > > so the rq's uclamp needs to be updated before the task is enqueued,
> > > >
> > > > this doesn't ensure that new rq's uclamp will be taken into account
> > >
> > > Did I miss something?
> > >
> > > As following stack:
> > > enqueue_task_fair()
> > > update_load_avg()
> > > cfs_rq_util_change(cfs_rq, 0);
> >
> > As mentioned above, this doesn't always happen so you are not ensured
> > to take uclamp into account. If you mandate to take uclamp value into
> > account immediately this is not enough
>
> I understand your point now. I think what you're referring to is a
> different issue, just like what we discussed earlier with Prateek:
> https://lore.kernel.org/all/CAB8ipk_1=U_HgVQrfQ4VRUDrcHJBQd2LJ9aXh8PG6E-Z4_xS+g@mail.gmail.com/
>
> However, I think the purpose of this patch is to ensure that during
> the enqueue_task process, if a frequency change is triggered, the
> uclamp has already been updated before the frequency is changed.
okay, so please update the commit message because " When task's uclamp
is set, we hope that the CPU frequency
can increase as quickly as possible when the task is enqueued." is confusing
>
> >
> > > cpufreq_update_util()
> > > sugov_update_shared()
> > > sugov_next_freq_shared()
> > > sugov_get_util()
> > > effective_cpu_util()
> > > *min = max(irq + cpu_bw_dl(rq), uclamp_rq_get(rq, UCLAMP_MIN));
> > > *max = min(scale, uclamp_rq_get(rq, UCLAMP_MAX));
> > >
> > > So, the rq's uclamp value should update before enqueue_task().
> > > >
> > > > > just like util_est.
> > > >
> > > > just like util_est
> > > >
> > > > > So, aline the uclamp and util_est and call before freq update.
> > > >
> > > > nit s/aline/align/ ?
> > > align.
> > > >
> > > > >
> > > > > For sched-delayed tasks, the rq uclamp/util_est should only be updated
> > > > > when they are enqueued upon being awakened.
> > > > > So simply the logic of util_est's enqueue/dequeue check.
> > > > >
> > > > > Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
> > > > > ---
> > > > > v2:
> > > > > - simply the util-est's en/dequeue check;
> > > > > ---
> > > > > Previous discussion:
> > > > > https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/
> > > > > https://lore.kernel.org/all/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com/T/#u
> > > > > https://lore.kernel.org/all/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com/T/#u
> > > > > https://lore.kernel.org/all/aa8baf67-a8ec-4ad8-a6a8-afdcd7036771@arm.com/
> > > > > ---
> > > > > kernel/sched/core.c | 17 ++++++++++-------
> > > > > kernel/sched/fair.c | 4 ++--
> > > > > 2 files changed, 12 insertions(+), 9 deletions(-)
> > > > >
> > > > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > > > > index 042351c7afce..72fbe2031e54 100644
> > > > > --- a/kernel/sched/core.c
> > > > > +++ b/kernel/sched/core.c
> > > > > @@ -1747,7 +1747,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
> > > > > }
> > > > > }
> > > > >
> > > > > -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> > > > > +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags)
> > > > > {
> > > > > enum uclamp_id clamp_id;
> > > > >
> > > > > @@ -1763,7 +1763,8 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> > > > > if (unlikely(!p->sched_class->uclamp_enabled))
> > > > > return;
> > > > >
> > > > > - if (p->se.sched_delayed)
> > > > > + /* Only inc the delayed task which being woken up. */
> > > > > + if (p->se.sched_delayed && !(flags & ENQUEUE_DELAYED))
> > > > > return;
> > > > >
> > > > > for_each_clamp_id(clamp_id)
> > > > > @@ -2031,7 +2032,7 @@ static void __init init_uclamp(void)
> > > > > }
> > > > >
> > > > > #else /* !CONFIG_UCLAMP_TASK */
> > > > > -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
> > > > > +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags) { }
> > > > > static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
> > > > > static inline void uclamp_fork(struct task_struct *p) { }
> > > > > static inline void uclamp_post_fork(struct task_struct *p) { }
> > > > > @@ -2067,12 +2068,14 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> > > > > if (!(flags & ENQUEUE_NOCLOCK))
> > > > > update_rq_clock(rq);
> > > > >
> > > > > - p->sched_class->enqueue_task(rq, p, flags);
> > > > > /*
> > > > > - * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
> > > > > - * ->sched_delayed.
> > > > > + * Can be before ->enqueue_task() because uclamp considers the
> > > > > + * ENQUEUE_DELAYED task before its ->sched_delayed gets cleared
> > > > > + * in ->enqueue_task().
> > > > > */
> > > > > - uclamp_rq_inc(rq, p);
> > > > > + uclamp_rq_inc(rq, p, flags);
> > > > > +
> > > > > + p->sched_class->enqueue_task(rq, p, flags);
> > > > >
> > > > > psi_enqueue(p, flags);
> > > > >
> > > > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > > > > index c798d2795243..c92fee07fb7b 100644
> > > > > --- a/kernel/sched/fair.c
> > > > > +++ b/kernel/sched/fair.c
> > > > > @@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > > > > * Let's add the task's estimated utilization to the cfs_rq's
> > > > > * estimated utilization, before we update schedutil.
> > > > > */
> > > > > - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
> > > > > + if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
> > > >
> > > > commit message doesn't explain why you change util_est condition
> > >
> > > Because, the sched_delayed flag is set when dequeue_entity, and clear
> > > after the condition,
> > > so for migrating/prio_change, we could just check the sched_delayed.
> >
> > Why is testing sched_delayed enough for migrating/prio_change ?
> > With your change, we will remove then add back util_est when changing
> > prio of the task which is useless
>
> I sincerely apologize for any misunderstanding my previous description
> may have caused.
> When changing prio without changing class, the delayed_task's
> sched_delayed flag is not changed,
> we would not remove then add back util_est.
> If the class was changed:
>
> if (prev_class != next_class && p->se.sched_delayed)
> dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED |
> DEQUEUE_NOCLOCK);
>
> It will dequeue the delayed-task first, and will not enqueue it.
>
> As for normal tasks which are not delayed, indeed, the issue you
> mentioned can occur, but it seems that this problem has always
> existed. Perhaps this is a new issue that has come up.
I have been confused by the patch that added the condition "if
(!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags &
ENQUEUE_RESTORE))))". I wrongly thought it was for
dequeue_save/enqueue_restore
Could you please split this in 2 patches :
patch 1 updates condition for util_est_dequeue/enqueue and a
description why it's safe
patch 2 for aligning uclamp with util_est
Thanks
>
> Thanks!
>
> ---
> xuewen.yan
>
> >
> >
> > > And for the wakeup, because the the sched_delayed flag is cleared after this,
> > > so use the ENQUEUE_DELAYED flag to ensure the util_est could enqueue.
> > >
> > > >
> > > > > util_est_enqueue(&rq->cfs, p);
> > > > >
> > > > > if (flags & ENQUEUE_DELAYED) {
> > > > > @@ -7168,7 +7168,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
> > > > > */
> > > > > static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > > > > {
> > > > > - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
> > > > > + if (!p->se.sched_delayed)
> > > >
> > > > same here, you should explain in commit message why it's okay to do so
> > >
> > > Same as above, the sched_delayed flag is set when dequeue_entity, so
> > > this place,
> > > the sched_delayed was not set when sleeping, If the flag is set, it
> > > indicates that it
> > > was migrating or prio changing.
> > >
> > > By the way, I will kindly update these reasons in the commit message.
> > >
> > > Thanks!
> > >
> > > BR
> > > ---
> > > xuewen
On Wed, Apr 16, 2025 at 8:19 PM Vincent Guittot
<vincent.guittot@linaro.org> wrote:
>
> On Wed, 16 Apr 2025 at 13:07, Xuewen Yan <xuewen.yan94@gmail.com> wrote:
> >
> > On Wed, Apr 16, 2025 at 5:42 PM Vincent Guittot
> > <vincent.guittot@linaro.org> wrote:
> > >
> > > On Wed, 16 Apr 2025 at 04:55, Xuewen Yan <xuewen.yan94@gmail.com> wrote:
> > > >
> > > > On Wed, Apr 16, 2025 at 1:05 AM Vincent Guittot
> > > > <vincent.guittot@linaro.org> wrote:
> > > > >
> > > > > On Tue, 25 Mar 2025 at 02:48, Xuewen Yan <xuewen.yan@unisoc.com> wrote:
> > > > > >
> > > > > > When task's uclamp is set, we hope that the CPU frequency
> > > > > > can increase as quickly as possible when the task is enqueued.
> > > > > > Because the cpu frequency updating happens during the enqueue_task(),
> > > > >
> > > > > Strictly speaking, it doesn't happen during enqueue_task but when :
> > > > > - attach/detach tasks when migrating
> > > > > - update_load_avg decayed
> > > > > - io_wait
> > > > >
> > > > > This often happens during an enqueue but not always ...
> > > >
> > > > Okay, I would make some adjustments to these descriptions.
> > > >
> > > > >
> > > > > > so the rq's uclamp needs to be updated before the task is enqueued,
> > > > >
> > > > > this doesn't ensure that new rq's uclamp will be taken into account
> > > >
> > > > Did I miss something?
> > > >
> > > > As following stack:
> > > > enqueue_task_fair()
> > > > update_load_avg()
> > > > cfs_rq_util_change(cfs_rq, 0);
> > >
> > > As mentioned above, this doesn't always happen so you are not ensured
> > > to take uclamp into account. If you mandate to take uclamp value into
> > > account immediately this is not enough
> >
> > I understand your point now. I think what you're referring to is a
> > different issue, just like what we discussed earlier with Prateek:
> > https://lore.kernel.org/all/CAB8ipk_1=U_HgVQrfQ4VRUDrcHJBQd2LJ9aXh8PG6E-Z4_xS+g@mail.gmail.com/
> >
> > However, I think the purpose of this patch is to ensure that during
> > the enqueue_task process, if a frequency change is triggered, the
> > uclamp has already been updated before the frequency is changed.
>
> okay, so please update the commit message because " When task's uclamp
> is set, we hope that the CPU frequency
> can increase as quickly as possible when the task is enqueued." is confusing
Okay.
>
> >
> > >
> > > > cpufreq_update_util()
> > > > sugov_update_shared()
> > > > sugov_next_freq_shared()
> > > > sugov_get_util()
> > > > effective_cpu_util()
> > > > *min = max(irq + cpu_bw_dl(rq), uclamp_rq_get(rq, UCLAMP_MIN));
> > > > *max = min(scale, uclamp_rq_get(rq, UCLAMP_MAX));
> > > >
> > > > So, the rq's uclamp value should update before enqueue_task().
> > > > >
> > > > > > just like util_est.
> > > > >
> > > > > just like util_est
> > > > >
> > > > > > So, aline the uclamp and util_est and call before freq update.
> > > > >
> > > > > nit s/aline/align/ ?
> > > > align.
> > > > >
> > > > > >
> > > > > > For sched-delayed tasks, the rq uclamp/util_est should only be updated
> > > > > > when they are enqueued upon being awakened.
> > > > > > So simply the logic of util_est's enqueue/dequeue check.
> > > > > >
> > > > > > Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
> > > > > > ---
> > > > > > v2:
> > > > > > - simply the util-est's en/dequeue check;
> > > > > > ---
> > > > > > Previous discussion:
> > > > > > https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/
> > > > > > https://lore.kernel.org/all/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com/T/#u
> > > > > > https://lore.kernel.org/all/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com/T/#u
> > > > > > https://lore.kernel.org/all/aa8baf67-a8ec-4ad8-a6a8-afdcd7036771@arm.com/
> > > > > > ---
> > > > > > kernel/sched/core.c | 17 ++++++++++-------
> > > > > > kernel/sched/fair.c | 4 ++--
> > > > > > 2 files changed, 12 insertions(+), 9 deletions(-)
> > > > > >
> > > > > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > > > > > index 042351c7afce..72fbe2031e54 100644
> > > > > > --- a/kernel/sched/core.c
> > > > > > +++ b/kernel/sched/core.c
> > > > > > @@ -1747,7 +1747,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
> > > > > > }
> > > > > > }
> > > > > >
> > > > > > -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> > > > > > +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags)
> > > > > > {
> > > > > > enum uclamp_id clamp_id;
> > > > > >
> > > > > > @@ -1763,7 +1763,8 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> > > > > > if (unlikely(!p->sched_class->uclamp_enabled))
> > > > > > return;
> > > > > >
> > > > > > - if (p->se.sched_delayed)
> > > > > > + /* Only inc the delayed task which being woken up. */
> > > > > > + if (p->se.sched_delayed && !(flags & ENQUEUE_DELAYED))
> > > > > > return;
> > > > > >
> > > > > > for_each_clamp_id(clamp_id)
> > > > > > @@ -2031,7 +2032,7 @@ static void __init init_uclamp(void)
> > > > > > }
> > > > > >
> > > > > > #else /* !CONFIG_UCLAMP_TASK */
> > > > > > -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
> > > > > > +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags) { }
> > > > > > static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
> > > > > > static inline void uclamp_fork(struct task_struct *p) { }
> > > > > > static inline void uclamp_post_fork(struct task_struct *p) { }
> > > > > > @@ -2067,12 +2068,14 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> > > > > > if (!(flags & ENQUEUE_NOCLOCK))
> > > > > > update_rq_clock(rq);
> > > > > >
> > > > > > - p->sched_class->enqueue_task(rq, p, flags);
> > > > > > /*
> > > > > > - * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
> > > > > > - * ->sched_delayed.
> > > > > > + * Can be before ->enqueue_task() because uclamp considers the
> > > > > > + * ENQUEUE_DELAYED task before its ->sched_delayed gets cleared
> > > > > > + * in ->enqueue_task().
> > > > > > */
> > > > > > - uclamp_rq_inc(rq, p);
> > > > > > + uclamp_rq_inc(rq, p, flags);
> > > > > > +
> > > > > > + p->sched_class->enqueue_task(rq, p, flags);
> > > > > >
> > > > > > psi_enqueue(p, flags);
> > > > > >
> > > > > > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > > > > > index c798d2795243..c92fee07fb7b 100644
> > > > > > --- a/kernel/sched/fair.c
> > > > > > +++ b/kernel/sched/fair.c
> > > > > > @@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > > > > > * Let's add the task's estimated utilization to the cfs_rq's
> > > > > > * estimated utilization, before we update schedutil.
> > > > > > */
> > > > > > - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
> > > > > > + if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
> > > > >
> > > > > commit message doesn't explain why you change util_est condition
> > > >
> > > > Because, the sched_delayed flag is set when dequeue_entity, and clear
> > > > after the condition,
> > > > so for migrating/prio_change, we could just check the sched_delayed.
> > >
> > > Why is testing sched_delayed enough for migrating/prio_change ?
> > > With your change, we will remove then add back util_est when changing
> > > prio of the task which is useless
> >
> > I sincerely apologize for any misunderstanding my previous description
> > may have caused.
> > When changing prio without changing class, the delayed_task's
> > sched_delayed flag is not changed,
> > we would not remove then add back util_est.
> > If the class was changed:
> >
> > if (prev_class != next_class && p->se.sched_delayed)
> > dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED |
> > DEQUEUE_NOCLOCK);
> >
> > It will dequeue the delayed-task first, and will not enqueue it.
> >
> > As for normal tasks which are not delayed, indeed, the issue you
> > mentioned can occur, but it seems that this problem has always
> > existed. Perhaps this is a new issue that has come up.
>
> I have been confused by the patch that added the condition "if
> (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags &
> ENQUEUE_RESTORE))))". I wrongly thought it was for
> dequeue_save/enqueue_restore
>
> Could you please split this in 2 patches :
> patch 1 updates condition for util_est_dequeue/enqueue and a
> description why it's safe
> patch 2 for aligning uclamp with util_est
>
Alright, I will make the changes in patch-v3 as well.
Thanks!
> Thanks
>
> >
> > Thanks!
> >
> > ---
> > xuewen.yan
> >
> > >
> > >
> > > > And for the wakeup, because the the sched_delayed flag is cleared after this,
> > > > so use the ENQUEUE_DELAYED flag to ensure the util_est could enqueue.
> > > >
> > > > >
> > > > > > util_est_enqueue(&rq->cfs, p);
> > > > > >
> > > > > > if (flags & ENQUEUE_DELAYED) {
> > > > > > @@ -7168,7 +7168,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
> > > > > > */
> > > > > > static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > > > > > {
> > > > > > - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
> > > > > > + if (!p->se.sched_delayed)
> > > > >
> > > > > same here, you should explain in commit message why it's okay to do so
> > > >
> > > > Same as above, the sched_delayed flag is set when dequeue_entity, so
> > > > this place,
> > > > the sched_delayed was not set when sleeping, If the flag is set, it
> > > > indicates that it
> > > > was migrating or prio changing.
> > > >
> > > > By the way, I will kindly update these reasons in the commit message.
> > > >
> > > > Thanks!
> > > >
> > > > BR
> > > > ---
> > > > xuewen
On 16/04/2025 14:19, Vincent Guittot wrote:
> On Wed, 16 Apr 2025 at 13:07, Xuewen Yan <xuewen.yan94@gmail.com> wrote:
>>
>> On Wed, Apr 16, 2025 at 5:42 PM Vincent Guittot
>> <vincent.guittot@linaro.org> wrote:
>>>
>>> On Wed, 16 Apr 2025 at 04:55, Xuewen Yan <xuewen.yan94@gmail.com> wrote:
>>>>
>>>> On Wed, Apr 16, 2025 at 1:05 AM Vincent Guittot
>>>> <vincent.guittot@linaro.org> wrote:
>>>>>
>>>>> On Tue, 25 Mar 2025 at 02:48, Xuewen Yan <xuewen.yan@unisoc.com> wrote:
[...]
>>> Why is testing sched_delayed enough for migrating/prio_change ?
>>> With your change, we will remove then add back util_est when changing
>>> prio of the task which is useless
>>
>> I sincerely apologize for any misunderstanding my previous description
>> may have caused.
>> When changing prio without changing class, the delayed_task's
>> sched_delayed flag is not changed,
>> we would not remove then add back util_est.
>> If the class was changed:
>>
>> if (prev_class != next_class && p->se.sched_delayed)
>> dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED |
>> DEQUEUE_NOCLOCK);
>>
>> It will dequeue the delayed-task first, and will not enqueue it.
>>
>> As for normal tasks which are not delayed, indeed, the issue you
>> mentioned can occur, but it seems that this problem has always
>> existed. Perhaps this is a new issue that has come up.
>
> I have been confused by the patch that added the condition "if
> (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags &
> ENQUEUE_RESTORE))))". I wrongly thought it was for
> dequeue_save/enqueue_restore
No, this was just for sched_delayed. I convinced myself that the
logic stays the same with the following tests:
-->8--
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index eb5a2572b4f8..65692938696f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6930,6 +6930,19 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
int rq_h_nr_queued = rq->cfs.h_nr_queued;
u64 slice = 0;
+ bool D = !!p->se.sched_delayed;
+ bool M = task_on_rq_migrating(p);
+ bool Er = !!(flags & ENQUEUE_RESTORE);
+ bool Ed = !!(flags & ENQUEUE_DELAYED);
+
+ /* won't be called */
+ BUG_ON(D && M && Er); // [1]
+ BUG_ON(!D && M && Er); // [2]
+
+ BUG_ON(D && ((M || Er) == Ed)); // [3]
+ BUG_ON(!(D && (M || Er)) != (!D || Ed)); // [4]
+ BUG_ON(!(D && (M || Er)) != (!(D && !Ed)));
+
/*
* The code below (indirectly) updates schedutil which looks at
* the cfs_rq utilization to select a frequency.
@@ -7178,6 +7191,17 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
*/
static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
{
+ bool D = !!p->se.sched_delayed;
+ bool M = task_on_rq_migrating(p);
+ bool Ds = !!(flags & DEQUEUE_SAVE);
+
+ /* won't be called */
+ BUG_ON(D && M && Ds); // [5]
+ BUG_ON(!D && M && Ds); // [6]
+ BUG_ON(D && !M && !Ds); // [7]
+
+ BUG_ON(!(D && (M || Ds)) != !D); // [8]
+
if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
util_est_dequeue(&rq->cfs, p);
-->8--
In enqueue, when D is true, M or Er is never set with Ed. [3], [4].
In dequeue, since [7] is never true, [8] is never true as well.
> Could you please split this in 2 patches :
> patch 1 updates condition for util_est_dequeue/enqueue and a
> description why it's safe
> patch 2 for aligning uclamp with util_est
+1
[...]
On 25/03/2025 02:47, Xuewen Yan wrote: > When task's uclamp is set, we hope that the CPU frequency > can increase as quickly as possible when the task is enqueued. > Because the cpu frequency updating happens during the enqueue_task(), > so the rq's uclamp needs to be updated before the task is enqueued, > just like util_est. > So, aline the uclamp and util_est and call before freq update. > > For sched-delayed tasks, the rq uclamp/util_est should only be updated > when they are enqueued upon being awakened. > So simply the logic of util_est's enqueue/dequeue check. > > Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com> > --- > v2: > - simply the util-est's en/dequeue check; > --- > Previous discussion: > https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/ > https://lore.kernel.org/all/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com/T/#u > https://lore.kernel.org/all/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com/T/#u > https://lore.kernel.org/all/aa8baf67-a8ec-4ad8-a6a8-afdcd7036771@arm.com/ LGTM. Reviewed-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Hello Xuewen,
On 3/25/2025 7:17 AM, Xuewen Yan wrote:
> When task's uclamp is set, we hope that the CPU frequency
> can increase as quickly as possible when the task is enqueued.
> Because the cpu frequency updating happens during the enqueue_task(),
> so the rq's uclamp needs to be updated before the task is enqueued,
> just like util_est.
> So, aline the uclamp and util_est and call before freq update.
>
> For sched-delayed tasks, the rq uclamp/util_est should only be updated
> when they are enqueued upon being awakened.
> So simply the logic of util_est's enqueue/dequeue check.
>
> Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
> ---
> v2:
> - simply the util-est's en/dequeue check;
> ---
> Previous discussion:
> https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/
> https://lore.kernel.org/all/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com/T/#u
> https://lore.kernel.org/all/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com/T/#u
> https://lore.kernel.org/all/aa8baf67-a8ec-4ad8-a6a8-afdcd7036771@arm.com/
> ---
> kernel/sched/core.c | 17 ++++++++++-------
> kernel/sched/fair.c | 4 ++--
> 2 files changed, 12 insertions(+), 9 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 042351c7afce..72fbe2031e54 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -1747,7 +1747,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
> }
> }
>
> -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags)
> {
> enum uclamp_id clamp_id;
>
> @@ -1763,7 +1763,8 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> if (unlikely(!p->sched_class->uclamp_enabled))
> return;
>
> - if (p->se.sched_delayed)
> + /* Only inc the delayed task which being woken up. */
> + if (p->se.sched_delayed && !(flags & ENQUEUE_DELAYED))
> return;
>
> for_each_clamp_id(clamp_id)
> @@ -2031,7 +2032,7 @@ static void __init init_uclamp(void)
> }
>
> #else /* !CONFIG_UCLAMP_TASK */
> -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
> +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags) { }
> static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
> static inline void uclamp_fork(struct task_struct *p) { }
> static inline void uclamp_post_fork(struct task_struct *p) { }
> @@ -2067,12 +2068,14 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> if (!(flags & ENQUEUE_NOCLOCK))
> update_rq_clock(rq);
>
> - p->sched_class->enqueue_task(rq, p, flags);
> /*
> - * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
> - * ->sched_delayed.
> + * Can be before ->enqueue_task() because uclamp considers the
> + * ENQUEUE_DELAYED task before its ->sched_delayed gets cleared
> + * in ->enqueue_task().
> */
> - uclamp_rq_inc(rq, p);
> + uclamp_rq_inc(rq, p, flags);
> +
> + p->sched_class->enqueue_task(rq, p, flags);
>
> psi_enqueue(p, flags);
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index c798d2795243..c92fee07fb7b 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> * Let's add the task's estimated utilization to the cfs_rq's
> * estimated utilization, before we update schedutil.
> */
> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
> + if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
> util_est_enqueue(&rq->cfs, p);
Wouldn't this do a util_est_{dequeue,enqueue}() for a save restore
operation too of a non-delayed task? Is that desired?
On a larger note ...
An enqueue of a delayed task will call requeue_delayed_entity() which
will only enqueue p->se on its cfs_rq and do an update_load_avg() for
that cfs_rq alone.
With cgroups enabled, this cfs_rq might not be the root cfs_rq and
cfs_rq_util_change() will not call cpufreq_update_util() leaving the
CPU running at the older frequency despite the updated uclamp
constraints.
If think cfs_rq_util_change() should be called for the root cfs_rq
when a task is delayed or when it is re-enqueued to re-evaluate
the uclamp constraints.
Please let me know if I missed something.
>
> if (flags & ENQUEUE_DELAYED) {
> @@ -7168,7 +7168,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
> */
> static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> {
> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
> + if (!p->se.sched_delayed)
> util_est_dequeue(&rq->cfs, p);
>
> util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
--
Thanks and Regards,
Prateek
Hi Prateek,
On Wed, Mar 26, 2025 at 12:54 AM K Prateek Nayak <kprateek.nayak@amd.com> wrote:
>
> Hello Xuewen,
>
> On 3/25/2025 7:17 AM, Xuewen Yan wrote:
> > When task's uclamp is set, we hope that the CPU frequency
> > can increase as quickly as possible when the task is enqueued.
> > Because the cpu frequency updating happens during the enqueue_task(),
> > so the rq's uclamp needs to be updated before the task is enqueued,
> > just like util_est.
> > So, aline the uclamp and util_est and call before freq update.
> >
> > For sched-delayed tasks, the rq uclamp/util_est should only be updated
> > when they are enqueued upon being awakened.
> > So simply the logic of util_est's enqueue/dequeue check.
> >
> > Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
> > ---
> > v2:
> > - simply the util-est's en/dequeue check;
> > ---
> > Previous discussion:
> > https://lore.kernel.org/all/CAB8ipk8pEvOtCm-d0o1rsekwxPWUHk9iBGtt9TLTWW-iWTQKiA@mail.gmail.com/
> > https://lore.kernel.org/all/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com/T/#u
> > https://lore.kernel.org/all/CAB8ipk9LpbiUDnbcV6+59+Sa=Ai7tFzO===mpLD3obNdV4=J-A@mail.gmail.com/T/#u
> > https://lore.kernel.org/all/aa8baf67-a8ec-4ad8-a6a8-afdcd7036771@arm.com/
> > ---
> > kernel/sched/core.c | 17 ++++++++++-------
> > kernel/sched/fair.c | 4 ++--
> > 2 files changed, 12 insertions(+), 9 deletions(-)
> >
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index 042351c7afce..72fbe2031e54 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -1747,7 +1747,7 @@ static inline void uclamp_rq_dec_id(struct rq *rq, struct task_struct *p,
> > }
> > }
> >
> > -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> > +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags)
> > {
> > enum uclamp_id clamp_id;
> >
> > @@ -1763,7 +1763,8 @@ static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p)
> > if (unlikely(!p->sched_class->uclamp_enabled))
> > return;
> >
> > - if (p->se.sched_delayed)
> > + /* Only inc the delayed task which being woken up. */
> > + if (p->se.sched_delayed && !(flags & ENQUEUE_DELAYED))
> > return;
> >
> > for_each_clamp_id(clamp_id)
> > @@ -2031,7 +2032,7 @@ static void __init init_uclamp(void)
> > }
> >
> > #else /* !CONFIG_UCLAMP_TASK */
> > -static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p) { }
> > +static inline void uclamp_rq_inc(struct rq *rq, struct task_struct *p, int flags) { }
> > static inline void uclamp_rq_dec(struct rq *rq, struct task_struct *p) { }
> > static inline void uclamp_fork(struct task_struct *p) { }
> > static inline void uclamp_post_fork(struct task_struct *p) { }
> > @@ -2067,12 +2068,14 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
> > if (!(flags & ENQUEUE_NOCLOCK))
> > update_rq_clock(rq);
> >
> > - p->sched_class->enqueue_task(rq, p, flags);
> > /*
> > - * Must be after ->enqueue_task() because ENQUEUE_DELAYED can clear
> > - * ->sched_delayed.
> > + * Can be before ->enqueue_task() because uclamp considers the
> > + * ENQUEUE_DELAYED task before its ->sched_delayed gets cleared
> > + * in ->enqueue_task().
> > */
> > - uclamp_rq_inc(rq, p);
> > + uclamp_rq_inc(rq, p, flags);
> > +
> > + p->sched_class->enqueue_task(rq, p, flags);
> >
> > psi_enqueue(p, flags);
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index c798d2795243..c92fee07fb7b 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
> > * Let's add the task's estimated utilization to the cfs_rq's
> > * estimated utilization, before we update schedutil.
> > */
> > - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
> > + if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
> > util_est_enqueue(&rq->cfs, p);
>
> Wouldn't this do a util_est_{dequeue,enqueue}() for a save restore
> operation too of a non-delayed task? Is that desired?
For delayed-task, its util_est should dequeue/enqueue only for its
sleeping and waking up,
For the save restore operation, there is no need to enqueue it,
because it is not woken up.
So the condition of enqueue actually is:
if (!p->se.sched_delayed || (p->se.sched_delayed && (flags & ENQUEUE_DELAYED)))
And, this is equal to :
if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
More details here:
https://lore.kernel.org/all/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com/T/#ma2505e90489316eb354390b42dee9d053f6fd1e9
>
> On a larger note ...
>
> An enqueue of a delayed task will call requeue_delayed_entity() which
> will only enqueue p->se on its cfs_rq and do an update_load_avg() for
> that cfs_rq alone.
>
> With cgroups enabled, this cfs_rq might not be the root cfs_rq and
> cfs_rq_util_change() will not call cpufreq_update_util() leaving the
> CPU running at the older frequency despite the updated uclamp
> constraints.
>
> If think cfs_rq_util_change() should be called for the root cfs_rq
> when a task is delayed or when it is re-enqueued to re-evaluate
> the uclamp constraints.
I think you're referring to a different issue with the delayed-task's
util_ets/uclamp.
This issue is unrelated to util-est and uclamp, because even without
these two features, the problem you're mentioning still exists.
Specifically, if the delayed-task is not the root CFS task, the CPU
frequency might not be updated in time when the delayed-task is
enqueued.
Maybe we could add the update_load_avg() in clear_delayed to solve the issue?
-->8--
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a0c4cd26ee07..c75d50dab86b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5435,6 +5435,7 @@ static void clear_delayed(struct sched_entity *se)
for_each_sched_entity(se) {
struct cfs_rq *cfs_rq = cfs_rq_of(se);
+ update_load_avg(cfs_rq, se, UPDATE_TG);
cfs_rq->h_nr_runnable++;
if (cfs_rq_throttled(cfs_rq))
break;
---
BR
xuewen
Hello Xuewen,
On 3/26/2025 8:27 AM, Xuewen Yan wrote:
> Hi Prateek,
>
> On Wed, Mar 26, 2025 at 12:54 AM K Prateek Nayak <kprateek.nayak@amd.com> wrote:
>>
>> Hello Xuewen,
>>
>> On 3/25/2025 7:17 AM, Xuewen Yan wrote:
>>> When task's uclamp is set, we hope that the CPU frequency
>>> can increase as quickly as possible when the task is enqueued.
>>> Because the cpu frequency updating happens during the enqueue_task(),
>>> so the rq's uclamp needs to be updated before the task is enqueued,
>>> just like util_est.
I thought the frequency ramp up / ramp down was a problem with
delayed tasks being requeued.
>>> So, aline the uclamp and util_est and call before freq update.
>>>
>>> For sched-delayed tasks, the rq uclamp/util_est should only be updated
>>> when they are enqueued upon being awakened.
>>> So simply the logic of util_est's enqueue/dequeue check.
>>>
>>> Signed-off-by: Xuewen Yan <xuewen.yan@unisoc.com>
[..snip..]
>>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>>> index c798d2795243..c92fee07fb7b 100644
>>> --- a/kernel/sched/fair.c
>>> +++ b/kernel/sched/fair.c
>>> @@ -6930,7 +6930,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
>>> * Let's add the task's estimated utilization to the cfs_rq's
>>> * estimated utilization, before we update schedutil.
>>> */
>>> - if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & ENQUEUE_RESTORE))))
>>> + if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
>>> util_est_enqueue(&rq->cfs, p);
>>
>> Wouldn't this do a util_est_{dequeue,enqueue}() for a save restore
>> operation too of a non-delayed task? Is that desired?
>
> For delayed-task, its util_est should dequeue/enqueue only for its
> sleeping and waking up,
> For the save restore operation, there is no need to enqueue it,
> because it is not woken up.
> So the condition of enqueue actually is:
> if (!p->se.sched_delayed || (p->se.sched_delayed && (flags & ENQUEUE_DELAYED)))
> And, this is equal to :
> if (!p->se.sched_delayed || (flags & ENQUEUE_DELAYED))
>
> More details here:
> https://lore.kernel.org/all/84441660bef0a5e67fd09dc3787178d0276dad31.1740664400.git.hongyan.xia2@arm.com/T/#ma2505e90489316eb354390b42dee9d053f6fd1e9
>
Ah! Correct! I got my "&&"s and "||"s confused. Sorry about that.
>>
>> On a larger note ...
>>
>> An enqueue of a delayed task will call requeue_delayed_entity() which
>> will only enqueue p->se on its cfs_rq and do an update_load_avg() for
>> that cfs_rq alone.
>>
>> With cgroups enabled, this cfs_rq might not be the root cfs_rq and
>> cfs_rq_util_change() will not call cpufreq_update_util() leaving the
>> CPU running at the older frequency despite the updated uclamp
>> constraints.
>>
>> If think cfs_rq_util_change() should be called for the root cfs_rq
>> when a task is delayed or when it is re-enqueued to re-evaluate
>> the uclamp constraints.
>
> I think you're referring to a different issue with the delayed-task's
> util_ets/uclamp.
> This issue is unrelated to util-est and uclamp, because even without
> these two features, the problem you're mentioning still exists.
> Specifically, if the delayed-task is not the root CFS task, the CPU
> frequency might not be updated in time when the delayed-task is
> enqueued.
> Maybe we could add the update_load_avg() in clear_delayed to solve the issue?
I thought something like:
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a0c4cd26ee07..007b0bb91529 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5473,6 +5473,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
if (sched_feat(DELAY_DEQUEUE) && delay &&
!entity_eligible(cfs_rq, se)) {
update_load_avg(cfs_rq, se, 0);
+ /* Reevaluate frequency since uclamp may have changed */
+ if (cfs_rq != rq->cfs)
+ cfs_rq_util_change(rq->cfs, 0);
set_delayed(se);
return false;
}
@@ -6916,6 +6919,9 @@ requeue_delayed_entity(struct sched_entity *se)
}
update_load_avg(cfs_rq, se, 0);
+ /* Reevaluate frequency since uclamp may have changed */
+ if (cfs_rq != rq->cfs)
+ cfs_rq_util_change(rq->cfs, 0);
clear_delayed(se);
}
---
to ensure that schedutil knows about any changes in the uclamp
constraints at the first dequeue, at reenqueue.
>
> -->8--
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index a0c4cd26ee07..c75d50dab86b 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5435,6 +5435,7 @@ static void clear_delayed(struct sched_entity *se)
> for_each_sched_entity(se) {
> struct cfs_rq *cfs_rq = cfs_rq_of(se);
>
> + update_load_avg(cfs_rq, se, UPDATE_TG);
For finish_delayed_dequeue_entity() calling into clear_delayed(),
UPDATE_TG would be done already in dequeue_entity().
For requeue, I believe the motivation to skip UPDATE_TG was for
the entity to compete with its original weight to be picked off
later.
> cfs_rq->h_nr_runnable++;
> if (cfs_rq_throttled(cfs_rq))
> break;
>
> ---
>
> BR
> xuewen
--
Thanks and Regards,
Prateek
Hi Prateek,
On Wed, Mar 26, 2025 at 12:37 PM K Prateek Nayak <kprateek.nayak@amd.com> wrote:
>
> Hello Xuewen,
>
> On 3/26/2025 8:27 AM, Xuewen Yan wrote:
> > Hi Prateek,
> >
> > On Wed, Mar 26, 2025 at 12:54 AM K Prateek Nayak <kprateek.nayak@amd.com> wrote:
> >>
> >> Hello Xuewen,
> >>
> >> On 3/25/2025 7:17 AM, Xuewen Yan wrote:
> >>> When task's uclamp is set, we hope that the CPU frequency
> >>> can increase as quickly as possible when the task is enqueued.
> >>> Because the cpu frequency updating happens during the enqueue_task(),
> >>> so the rq's uclamp needs to be updated before the task is enqueued,
> >>> just like util_est.
>
> I thought the frequency ramp up / ramp down was a problem with
> delayed tasks being requeued.
>
Yes, you are right.
IMHO, perhaps this issue should be fixed separately, as uclamp not
only affects delayed tasks, but should also be placed before
enqueue-task for other tasks.
On the other hand, I previously also sent a message regarding the
frequency issue with delayed tasks in iowait.
https://lore.kernel.org/all/20250226114301.4900-1-xuewen.yan@unisoc.com/
...
> >> On a larger note ...
> >>
> >> An enqueue of a delayed task will call requeue_delayed_entity() which
> >> will only enqueue p->se on its cfs_rq and do an update_load_avg() for
> >> that cfs_rq alone.
> >>
> >> With cgroups enabled, this cfs_rq might not be the root cfs_rq and
> >> cfs_rq_util_change() will not call cpufreq_update_util() leaving the
> >> CPU running at the older frequency despite the updated uclamp
> >> constraints.
> >>
> >> If think cfs_rq_util_change() should be called for the root cfs_rq
> >> when a task is delayed or when it is re-enqueued to re-evaluate
> >> the uclamp constraints.
> >
> > I think you're referring to a different issue with the delayed-task's
> > util_ets/uclamp.
> > This issue is unrelated to util-est and uclamp, because even without
> > these two features, the problem you're mentioning still exists.
> > Specifically, if the delayed-task is not the root CFS task, the CPU
> > frequency might not be updated in time when the delayed-task is
> > enqueued.
> > Maybe we could add the update_load_avg() in clear_delayed to solve the issue?
>
> I thought something like:
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index a0c4cd26ee07..007b0bb91529 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -5473,6 +5473,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
> if (sched_feat(DELAY_DEQUEUE) && delay &&
> !entity_eligible(cfs_rq, se)) {
> update_load_avg(cfs_rq, se, 0);
> + /* Reevaluate frequency since uclamp may have changed */
> + if (cfs_rq != rq->cfs)
> + cfs_rq_util_change(rq->cfs, 0);
> set_delayed(se);
> return false;
> }
> @@ -6916,6 +6919,9 @@ requeue_delayed_entity(struct sched_entity *se)
> }
>
> update_load_avg(cfs_rq, se, 0);
> + /* Reevaluate frequency since uclamp may have changed */
> + if (cfs_rq != rq->cfs)
> + cfs_rq_util_change(rq->cfs, 0);
> clear_delayed(se);
> }
>
> ---
>
> to ensure that schedutil knows about any changes in the uclamp
> constraints at the first dequeue, at reenqueue.
Because of the decay of update_load_avg(), for a normal task with
uclamp, it doesn't necessarily trigger frequency update when enqueued.
If we want to enforce frequency scaling for requeued delayed-tasks,
would it be possible to extend this change to trigger frequency update
for all enqueued tasks?
>
> >
> > -->8--
> >
> > diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> > index a0c4cd26ee07..c75d50dab86b 100644
> > --- a/kernel/sched/fair.c
> > +++ b/kernel/sched/fair.c
> > @@ -5435,6 +5435,7 @@ static void clear_delayed(struct sched_entity *se)
> > for_each_sched_entity(se) {
> > struct cfs_rq *cfs_rq = cfs_rq_of(se);
> >
> > + update_load_avg(cfs_rq, se, UPDATE_TG);
>
> For finish_delayed_dequeue_entity() calling into clear_delayed(),
> UPDATE_TG would be done already in dequeue_entity().
>
> For requeue, I believe the motivation to skip UPDATE_TG was for
> the entity to compete with its original weight to be picked off
> later.
Okay.
>
---
BR
xuewen
On 26/03/2025 12:46, Xuewen Yan wrote:
> Hi Prateek,
>
> On Wed, Mar 26, 2025 at 12:37 PM K Prateek Nayak <kprateek.nayak@amd.com> wrote:
>>
>> Hello Xuewen,
>>
>> On 3/26/2025 8:27 AM, Xuewen Yan wrote:
>>> Hi Prateek,
>>>
>>> On Wed, Mar 26, 2025 at 12:54 AM K Prateek Nayak <kprateek.nayak@amd.com> wrote:
>>>>
>>>> Hello Xuewen,
>>>>
>>>> On 3/25/2025 7:17 AM, Xuewen Yan wrote:
[...]
>>>> If think cfs_rq_util_change() should be called for the root cfs_rq
>>>> when a task is delayed or when it is re-enqueued to re-evaluate
>>>> the uclamp constraints.
>>>
>>> I think you're referring to a different issue with the delayed-task's
>>> util_ets/uclamp.
>>> This issue is unrelated to util-est and uclamp, because even without
>>> these two features, the problem you're mentioning still exists.
>>> Specifically, if the delayed-task is not the root CFS task, the CPU
>>> frequency might not be updated in time when the delayed-task is
>>> enqueued.
>>> Maybe we could add the update_load_avg() in clear_delayed to solve the issue?
>>
>> I thought something like:
>>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index a0c4cd26ee07..007b0bb91529 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -5473,6 +5473,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
>> if (sched_feat(DELAY_DEQUEUE) && delay &&
>> !entity_eligible(cfs_rq, se)) {
>> update_load_avg(cfs_rq, se, 0);
>> + /* Reevaluate frequency since uclamp may have changed */
>> + if (cfs_rq != rq->cfs)
>> + cfs_rq_util_change(rq->cfs, 0);
>> set_delayed(se);
>> return false;
>> }
>> @@ -6916,6 +6919,9 @@ requeue_delayed_entity(struct sched_entity *se)
>> }
>>
>> update_load_avg(cfs_rq, se, 0);
>> + /* Reevaluate frequency since uclamp may have changed */
>> + if (cfs_rq != rq->cfs)
>> + cfs_rq_util_change(rq->cfs, 0);
>> clear_delayed(se);
>> }
>>
>> ---
>>
>> to ensure that schedutil knows about any changes in the uclamp
>> constraints at the first dequeue, at reenqueue.
>
> Because of the decay of update_load_avg(), for a normal task with
> uclamp, it doesn't necessarily trigger frequency update when enqueued.
> If we want to enforce frequency scaling for requeued delayed-tasks,
> would it be possible to extend this change to trigger frequency update
> for all enqueued tasks?
But IMHO this is not what we want to achieve here? Instead, we want that
the uclamp values of a just enqueued p_1 with:
'p_1->se.sched_delayed && !(flags & ENQUEUE_DELAYED)'
possibly count in CPU frequency settings of p_2 via:
enqueue_entity(..., &p_2->se, ...) -> update_load_avg() ->
if(decayed)cfs_rq_util_change()
e.g. for shared frequency domain:
-> sugov_update_shared() -> sugov_next_freq_shared() -> sugov_get_util()
-> effective_cpu_util(..., &min, &max)
uclamp is about applying the max value of all enqueued tasks.
[...]
© 2016 - 2025 Red Hat, Inc.