[PATCH v2 1/6] sched/eevdf: Fix HRTICK duration

Peter Zijlstra posted 6 patches 2 weeks, 4 days ago
[PATCH v2 1/6] sched/eevdf: Fix HRTICK duration
Posted by Peter Zijlstra 2 weeks, 4 days ago
The nominal duration for an EEVDF task to run is until its deadline.
At which point the deadline is moved ahead and a new task selection is
done.

Try and predict the time 'lost' to higher scheduling classes. Since
this is an estimate, the timer can be both early or late. In case it
is early task_tick_fair() will take the !need_resched() path and
restarts the timer.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 kernel/sched/fair.c |   55 +++++++++++++++++++++++++++++-----------------------
 1 file changed, 31 insertions(+), 24 deletions(-)

--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5511,7 +5511,7 @@ static void put_prev_entity(struct cfs_r
 }
 
 static void
-entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
+entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
 	/*
 	 * Update run-time statistics of the 'current'.
@@ -5523,17 +5523,6 @@ entity_tick(struct cfs_rq *cfs_rq, struc
 	 */
 	update_load_avg(cfs_rq, curr, UPDATE_TG);
 	update_cfs_group(curr);
-
-#ifdef CONFIG_SCHED_HRTICK
-	/*
-	 * queued ticks are scheduled to match the slice, so don't bother
-	 * validating it and just reschedule.
-	 */
-	if (queued) {
-		resched_curr_lazy(rq_of(cfs_rq));
-		return;
-	}
-#endif
 }
 
 
@@ -6735,21 +6724,39 @@ static inline void sched_fair_update_sto
 static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
 {
 	struct sched_entity *se = &p->se;
+	unsigned long scale = 1024;
+	unsigned long util = 0;
+	u64 vdelta;
+	u64 delta;
 
 	WARN_ON_ONCE(task_rq(p) != rq);
 
-	if (rq->cfs.h_nr_queued > 1) {
-		u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
-		u64 slice = se->slice;
-		s64 delta = slice - ran;
-
-		if (delta < 0) {
-			if (task_current_donor(rq, p))
-				resched_curr(rq);
-			return;
-		}
-		hrtick_start(rq, delta);
+	if (rq->cfs.h_nr_queued <= 1)
+		return;
+
+	/*
+	 * Compute time until virtual deadline
+	 */
+	vdelta = se->deadline - se->vruntime;
+	if ((s64)vdelta < 0) {
+		if (task_current_donor(rq, p))
+			resched_curr(rq);
+		return;
+	}
+	delta = (se->load.weight * vdelta) / NICE_0_LOAD;
+
+	/*
+	 * Correct for instantaneous load of other classes.
+	 */
+	util += cpu_util_dl(rq);
+	util += cpu_util_rt(rq);
+	util += cpu_util_irq(rq);
+	if (util && util < 1024) {
+		scale *= 1024;
+		scale /= (1024 - util);
 	}
+
+	hrtick_start(rq, (scale * delta) / 1024);
 }
 
 /*
@@ -13373,7 +13380,7 @@ static void task_tick_fair(struct rq *rq
 
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
-		entity_tick(cfs_rq, se, queued);
+		entity_tick(cfs_rq, se);
 	}
 
 	if (queued) {
Re: [PATCH v2 1/6] sched/eevdf: Fix HRTICK duration
Posted by Peter Zijlstra 3 days, 15 hours ago
On Wed, Jan 21, 2026 at 05:20:11PM +0100, Peter Zijlstra wrote:

> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -6735,21 +6724,39 @@ static inline void sched_fair_update_sto
>  static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
>  {
>  	struct sched_entity *se = &p->se;
> +	unsigned long scale = 1024;
> +	unsigned long util = 0;
> +	u64 vdelta;
> +	u64 delta;
>  
>  	WARN_ON_ONCE(task_rq(p) != rq);
>  
> +	if (rq->cfs.h_nr_queued <= 1)
> +		return;
> +
> +	/*
> +	 * Compute time until virtual deadline
> +	 */
> +	vdelta = se->deadline - se->vruntime;
> +	if ((s64)vdelta < 0) {
> +		if (task_current_donor(rq, p))
> +			resched_curr(rq);
> +		return;
> +	}
> +	delta = (se->load.weight * vdelta) / NICE_0_LOAD;
> +
> +	/*
> +	 * Correct for instantaneous load of other classes.
> +	 */
> +	util += cpu_util_dl(rq);
> +	util += cpu_util_rt(rq);

Since this is all about current, other scheduling classes are
irrelevant, they cannot run without causing schedule() which will cause
the hrtick to be reprogrammed anyway.

So I'm thinking those two lines above ought to go.

> +	util += cpu_util_irq(rq);
> +	if (util && util < 1024) {
> +		scale *= 1024;
> +		scale /= (1024 - util);
>  	}
> +
> +	hrtick_start(rq, (scale * delta) / 1024);
>  }
>  
>  /*

> @@ -5511,7 +5511,7 @@ static void put_prev_entity(struct cfs_r
>  }
>  
>  static void
> -entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
> +entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
>  {
>  	/*
>  	 * Update run-time statistics of the 'current'.
> @@ -5523,17 +5523,6 @@ entity_tick(struct cfs_rq *cfs_rq, struc
>  	 */
>  	update_load_avg(cfs_rq, curr, UPDATE_TG);
>  	update_cfs_group(curr);
> -
> -#ifdef CONFIG_SCHED_HRTICK
> -	/*
> -	 * queued ticks are scheduled to match the slice, so don't bother
> -	 * validating it and just reschedule.
> -	 */
> -	if (queued) {
> -		resched_curr_lazy(rq_of(cfs_rq));
> -		return;
> -	}
> -#endif
>  }
>  
>  
> @@ -13373,7 +13380,7 @@ static void task_tick_fair(struct rq *rq
>  
>  	for_each_sched_entity(se) {
>  		cfs_rq = cfs_rq_of(se);
> -		entity_tick(cfs_rq, se, queued);
> +		entity_tick(cfs_rq, se);
>  	}
>  
>  	if (queued) {
> 

So Thomas did observe some really small hrtimer reprogramming because of
this. If we just miss the normal deadline, it will re-try with a stupid
sliver of time.

Perhaps it makes sense to leave these two hunks, and simply hard preempt
when the hrtick goes, irrespective of slightly missing the vruntime due
to the approximation on task-clock.
Re: [PATCH v2 1/6] sched/eevdf: Fix HRTICK duration
Posted by Juri Lelli 2 weeks, 3 days ago
Hello,

On 21/01/26 17:20, Peter Zijlstra wrote:
> The nominal duration for an EEVDF task to run is until its deadline.
> At which point the deadline is moved ahead and a new task selection is
> done.
> 
> Try and predict the time 'lost' to higher scheduling classes. Since
> this is an estimate, the timer can be both early or late. In case it
> is early task_tick_fair() will take the !need_resched() path and
> restarts the timer.
> 
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---

...

> @@ -6735,21 +6724,39 @@ static inline void sched_fair_update_sto
>  static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
>  {
>  	struct sched_entity *se = &p->se;
> +	unsigned long scale = 1024;
> +	unsigned long util = 0;
> +	u64 vdelta;
> +	u64 delta;
>  
>  	WARN_ON_ONCE(task_rq(p) != rq);
>  
> -	if (rq->cfs.h_nr_queued > 1) {
> -		u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
> -		u64 slice = se->slice;
> -		s64 delta = slice - ran;
> -
> -		if (delta < 0) {
> -			if (task_current_donor(rq, p))
> -				resched_curr(rq);
> -			return;
> -		}
> -		hrtick_start(rq, delta);
> +	if (rq->cfs.h_nr_queued <= 1)
> +		return;
> +
> +	/*
> +	 * Compute time until virtual deadline
> +	 */
> +	vdelta = se->deadline - se->vruntime;
> +	if ((s64)vdelta < 0) {
> +		if (task_current_donor(rq, p))
> +			resched_curr(rq);
> +		return;
> +	}
> +	delta = (se->load.weight * vdelta) / NICE_0_LOAD;

Nit.. guess we don't fear overflow since vdelta should be bounded
anyway.

Reviewed-by: Juri Lelli <juri.lelli@redhat.com>

Thanks,
Juri