kernel/sched/fair.c | 41 +++++++++++++++++++++++++++-------------- 1 file changed, 27 insertions(+), 14 deletions(-)
The following commit has been merged into the sched/hrtick branch of tip:
Commit-ID: 558c18d3fbb6c5b9c0b42629d7fe34476363ac00
Gitweb: https://git.kernel.org/tip/558c18d3fbb6c5b9c0b42629d7fe34476363ac00
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Tue, 24 Feb 2026 17:35:17 +01:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Fri, 27 Feb 2026 16:40:03 +01:00
sched/eevdf: Fix HRTICK duration
The nominal duration for an EEVDF task to run is until its deadline. At
which point the deadline is moved ahead and a new task selection is done.
Try and predict the time 'lost' to higher scheduling classes. Since this is
an estimate, the timer can be both early or late. In case it is early
task_tick_fair() will take the !need_resched() path and restarts the timer.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Juri Lelli <juri.lelli@redhat.com>
Link: https://patch.msgid.link/20260224163428.798198874@kernel.org
---
kernel/sched/fair.c | 41 +++++++++++++++++++++++++++--------------
1 file changed, 27 insertions(+), 14 deletions(-)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index eea99ec..247fecd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6735,21 +6735,37 @@ static inline void sched_fair_update_stop_tick(struct rq *rq, struct task_struct
static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
{
struct sched_entity *se = &p->se;
+ unsigned long scale = 1024;
+ unsigned long util = 0;
+ u64 vdelta;
+ u64 delta;
WARN_ON_ONCE(task_rq(p) != rq);
- if (rq->cfs.h_nr_queued > 1) {
- u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
- u64 slice = se->slice;
- s64 delta = slice - ran;
+ if (rq->cfs.h_nr_queued <= 1)
+ return;
- if (delta < 0) {
- if (task_current_donor(rq, p))
- resched_curr(rq);
- return;
- }
- hrtick_start(rq, delta);
+ /*
+ * Compute time until virtual deadline
+ */
+ vdelta = se->deadline - se->vruntime;
+ if ((s64)vdelta < 0) {
+ if (task_current_donor(rq, p))
+ resched_curr(rq);
+ return;
}
+ delta = (se->load.weight * vdelta) / NICE_0_LOAD;
+
+ /*
+ * Correct for instantaneous load of other classes.
+ */
+ util += cpu_util_irq(rq);
+ if (util && util < 1024) {
+ scale *= 1024;
+ scale /= (1024 - util);
+ }
+
+ hrtick_start(rq, (scale * delta) / 1024);
}
/*
@@ -13365,11 +13381,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
entity_tick(cfs_rq, se, queued);
}
- if (queued) {
- if (!need_resched())
- hrtick_start_fair(rq, curr);
+ if (queued)
return;
- }
if (static_branch_unlikely(&sched_numa_balancing))
task_tick_numa(rq, curr);
Sorry for very very late reply. I was trying to go through this.
On 2/28/26 9:07 PM, tip-bot2 for Peter Zijlstra wrote:
> The following commit has been merged into the sched/hrtick branch of tip:
>
> Commit-ID: 558c18d3fbb6c5b9c0b42629d7fe34476363ac00
> Gitweb: https://git.kernel.org/tip/558c18d3fbb6c5b9c0b42629d7fe34476363ac00
> Author: Peter Zijlstra <peterz@infradead.org>
> AuthorDate: Tue, 24 Feb 2026 17:35:17 +01:00
> Committer: Peter Zijlstra <peterz@infradead.org>
> CommitterDate: Fri, 27 Feb 2026 16:40:03 +01:00
>
> sched/eevdf: Fix HRTICK duration
>
> The nominal duration for an EEVDF task to run is until its deadline. At
> which point the deadline is moved ahead and a new task selection is done.
>
> Try and predict the time 'lost' to higher scheduling classes. Since this is
> an estimate, the timer can be both early or late. In case it is early
> task_tick_fair() will take the !need_resched() path and restarts the timer.
>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Signed-off-by: Thomas Gleixner <tglx@kernel.org>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Reviewed-by: Juri Lelli <juri.lelli@redhat.com>
> Link: https://patch.msgid.link/20260224163428.798198874@kernel.org
> ---
> kernel/sched/fair.c | 41 +++++++++++++++++++++++++++--------------
> 1 file changed, 27 insertions(+), 14 deletions(-)
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index eea99ec..247fecd 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6735,21 +6735,37 @@ static inline void sched_fair_update_stop_tick(struct rq *rq, struct task_struct
> static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
> {
> struct sched_entity *se = &p->se;
> + unsigned long scale = 1024;
> + unsigned long util = 0;
> + u64 vdelta;
> + u64 delta;
>
> WARN_ON_ONCE(task_rq(p) != rq);
>
> - if (rq->cfs.h_nr_queued > 1) {
> - u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
> - u64 slice = se->slice;
> - s64 delta = slice - ran;
> + if (rq->cfs.h_nr_queued <= 1)
> + return;
>
> - if (delta < 0) {
> - if (task_current_donor(rq, p))
> - resched_curr(rq);
> - return;
> - }
> - hrtick_start(rq, delta);
> + /*
> + * Compute time until virtual deadline
> + */
> + vdelta = se->deadline - se->vruntime;
> + if ((s64)vdelta < 0) {
> + if (task_current_donor(rq, p))
> + resched_curr(rq);
> + return;
> }
> + delta = (se->load.weight * vdelta) / NICE_0_LOAD;
> +
> + /*
> + * Correct for instantaneous load of other classes.
> + */
> + util += cpu_util_irq(rq);
> + if (util && util < 1024) {
> + scale *= 1024;
> + scale /= (1024 - util);
> + }
Comments/Changelog says other classes.
Then why not consider cpu_util_dl, cpu_util_rq too?
Is there a reason why these are not taken into calculations?
> +
> + hrtick_start(rq, (scale * delta) / 1024);
> }
>
> /*
> @@ -13365,11 +13381,8 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
> entity_tick(cfs_rq, se, queued);
> }
>
> - if (queued) {
> - if (!need_resched())
> - hrtick_start_fair(rq, curr);
> + if (queued)
> return;
> - }
>
> if (static_branch_unlikely(&sched_numa_balancing))
> task_tick_numa(rq, curr);
On Fri, Mar 20, 2026 at 08:29:11PM +0530, Shrikanth Hegde wrote:
> > + /*
> > + * Correct for instantaneous load of other classes.
> > + */
> > + util += cpu_util_irq(rq);
> > + if (util && util < 1024) {
> > + scale *= 1024;
> > + scale /= (1024 - util);
> > + }
>
> Comments/Changelog says other classes.
>
> Then why not consider cpu_util_dl, cpu_util_rq too?
> Is there a reason why these are not taken into calculations?
Damn, forgot to fix that comment.
So yes, it used to correct for those, but then I realized that the
hrtick is strictly for current. So running RT/DL tasks means current is
different.
The only thing that can actually interrupt current and soak time are
interrupts.
Does that make sense?
On 3/20/26 9:08 PM, Peter Zijlstra wrote:
> On Fri, Mar 20, 2026 at 08:29:11PM +0530, Shrikanth Hegde wrote:
>
>>> + /*
>>> + * Correct for instantaneous load of other classes.
>>> + */
>>> + util += cpu_util_irq(rq);
>>> + if (util && util < 1024) {
>>> + scale *= 1024;
>>> + scale /= (1024 - util);
>>> + }
>>
>> Comments/Changelog says other classes.
>>
>> Then why not consider cpu_util_dl, cpu_util_rq too?
>> Is there a reason why these are not taken into calculations?
>
> Damn, forgot to fix that comment.
>
> So yes, it used to correct for those, but then I realized that the
> hrtick is strictly for current. So running RT/DL tasks means current is
> different.
>
> The only thing that can actually interrupt current and soak time are
> interrupts.
>
> Does that make sense?
Yes. That helps.
© 2016 - 2026 Red Hat, Inc.