[v3] sched/fair: Manage lag and run to parity with different slices

[PATCH v3 2/6] sched/fair: Fix NO_RUN_TO_PARITY case

Posted by Vincent Guittot 3 months ago

EEVDF expects the scheduler to allocate a time quantum to the selected
entity and then pick a new entity for next quantum.
Although this notion of time quantum is not strictly doable in our case,
we can ensure a minimum runtime for each task most of the time and pick a
new entity after a minimum time has elapsed.
Reuse the slice protection of run to parity to ensure such runtime
quantum.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
 include/linux/sched.h | 10 +++++++++-
 kernel/sched/fair.c   | 30 +++++++++++++++++++-----------
 2 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index eec6b225e9d1..75579f2fb009 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -583,7 +583,15 @@ struct sched_entity {
 	u64				sum_exec_runtime;
 	u64				prev_sum_exec_runtime;
 	u64				vruntime;
-	s64				vlag;
+	union {
+		/*
+		 * When !@on_rq this field is vlag.
+		 * When cfs_rq->curr == se (which implies @on_rq)
+		 * this field is vprot. See protect_slice().
+		 */
+		s64                     vlag;
+		u64                     vprot;
+	};
 	u64				slice;
 
 	u64				nr_migrations;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 43712403ec98..97cf99bb71d6 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -882,23 +882,34 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
 }
 
 /*
- * HACK, stash a copy of deadline at the point of pick in vlag,
- * which isn't used until dequeue.
+ * Set the vruntime up to which an entity can run before looking
+ * for another entity to pick.
+ * In case of run to parity, we protect the entity up to its deadline.
+ * When run to parity is disabled, we give a minimum quantum to the running
+ * entity to ensure progress.
  */
 static inline void set_protect_slice(struct sched_entity *se)
 {
-	se->vlag = se->deadline;
+	u64 quantum = se->slice;
+
+	if (!sched_feat(RUN_TO_PARITY))
+		quantum = min(quantum, normalized_sysctl_sched_base_slice);
+
+	if (quantum != se->slice)
+		se->vprot = min_vruntime(se->deadline, se->vruntime + calc_delta_fair(quantum, se));
+	else
+		se->vprot = se->deadline;
 }
 
 static inline bool protect_slice(struct sched_entity *se)
 {
-	return se->vlag == se->deadline;
+	return ((s64)(se->vprot - se->vruntime) > 0);
 }
 
 static inline void cancel_protect_slice(struct sched_entity *se)
 {
 	if (protect_slice(se))
-		se->vlag = se->deadline + 1;
+		se->vprot = se->vruntime;
 }
 
 /*
@@ -937,7 +948,7 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
 	if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
 		curr = NULL;
 
-	if (sched_feat(RUN_TO_PARITY) && curr && protect_slice(curr))
+	if (curr && protect_slice(curr))
 		return curr;
 
 	/* Pick the leftmost entity if it's eligible */
@@ -1156,11 +1167,8 @@ static inline void update_curr_task(struct task_struct *p, s64 delta_exec)
 	cgroup_account_cputime(p, delta_exec);
 }
 
-static inline bool did_preempt_short(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+static inline bool resched_next_quantum(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
-	if (!sched_feat(PREEMPT_SHORT))
-		return false;
-
 	if (protect_slice(curr))
 		return false;
 
@@ -1248,7 +1256,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	if (cfs_rq->nr_queued == 1)
 		return;
 
-	if (resched || did_preempt_short(cfs_rq, curr)) {
+	if (resched || resched_next_quantum(cfs_rq, curr)) {
 		resched_curr_lazy(rq);
 		clear_buddies(cfs_rq, curr);
 	}
-- 
2.43.0

Re: [PATCH v3 2/6] sched/fair: Fix NO_RUN_TO_PARITY case

Posted by Peter Zijlstra 3 months ago

On Tue, Jul 08, 2025 at 06:56:26PM +0200, Vincent Guittot wrote:

>  static inline void set_protect_slice(struct sched_entity *se)
>  {
> -	se->vlag = se->deadline;
> +	u64 quantum = se->slice;
> +
> +	if (!sched_feat(RUN_TO_PARITY))
> +		quantum = min(quantum, normalized_sysctl_sched_base_slice);
> +
> +	if (quantum != se->slice)
> +		se->vprot = min_vruntime(se->deadline, se->vruntime + calc_delta_fair(quantum, se));
> +	else
> +		se->vprot = se->deadline;
>  }

I've done s/quantum/slice/ on the whole series. In the end this thing:

> +static inline bool resched_next_quantum(struct cfs_rq *cfs_rq, struct sched_entity *curr)

is gone, and *_protect_slice() has slice in the name, and its mostly
assigned from slice named variables.

Final form ends up looking like so:

static inline void set_protect_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
{
	u64 slice = normalized_sysctl_sched_base_slice;
	u64 vprot = se->deadline;

	if (sched_feat(RUN_TO_PARITY))
		slice = cfs_rq_min_slice(cfs_rq);

	slice = min(slice, se->slice);
	if (slice != se->slice)
		vprot = min_vruntime(vprot, se->vruntime + calc_delta_fair(slice, se));

	se->vprot = vprot;
}

I'll run a few compiles and then push out to queue/sched/core (and stick
the ttwu bits in queue/sched/ttwu -- as I should've done earlier).

Re: [PATCH v3 2/6] sched/fair: Fix NO_RUN_TO_PARITY case

Posted by Vincent Guittot 3 months ago

On Wed, 9 Jul 2025 at 11:17, Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Tue, Jul 08, 2025 at 06:56:26PM +0200, Vincent Guittot wrote:
>
> >  static inline void set_protect_slice(struct sched_entity *se)
> >  {
> > -     se->vlag = se->deadline;
> > +     u64 quantum = se->slice;
> > +
> > +     if (!sched_feat(RUN_TO_PARITY))
> > +             quantum = min(quantum, normalized_sysctl_sched_base_slice);
> > +
> > +     if (quantum != se->slice)
> > +             se->vprot = min_vruntime(se->deadline, se->vruntime + calc_delta_fair(quantum, se));
> > +     else
> > +             se->vprot = se->deadline;
> >  }
>
> I've done s/quantum/slice/ on the whole series. In the end this thing:
>
> > +static inline bool resched_next_quantum(struct cfs_rq *cfs_rq, struct sched_entity *curr)
>
> is gone, and *_protect_slice() has slice in the name, and its mostly
> assigned from slice named variables.
>
> Final form ends up looking like so:
>
> static inline void set_protect_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
> {
>         u64 slice = normalized_sysctl_sched_base_slice;
>         u64 vprot = se->deadline;
>
>         if (sched_feat(RUN_TO_PARITY))
>                 slice = cfs_rq_min_slice(cfs_rq);
>
>         slice = min(slice, se->slice);
>         if (slice != se->slice)
>                 vprot = min_vruntime(vprot, se->vruntime + calc_delta_fair(slice, se));
>
>         se->vprot = vprot;
> }

ok, looks good to me

>
> I'll run a few compiles and then push out to queue/sched/core (and stick
> the ttwu bits in queue/sched/ttwu -- as I should've done earlier).

[tip: sched/core] sched/fair: Fix NO_RUN_TO_PARITY case

Posted by tip-bot2 for Vincent Guittot 2 months, 4 weeks ago

The following commit has been merged into the sched/core branch of tip:

Commit-ID:     74eec63661d46a7153d04c2e0249eeb76cc76d44
Gitweb:        https://git.kernel.org/tip/74eec63661d46a7153d04c2e0249eeb76cc76d44
Author:        Vincent Guittot <vincent.guittot@linaro.org>
AuthorDate:    Tue, 08 Jul 2025 18:56:26 +02:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Wed, 09 Jul 2025 13:40:22 +02:00

sched/fair: Fix NO_RUN_TO_PARITY case

EEVDF expects the scheduler to allocate a time quantum to the selected
entity and then pick a new entity for next quantum.
Although this notion of time quantum is not strictly doable in our case,
we can ensure a minimum runtime for each task most of the time and pick a
new entity after a minimum time has elapsed.
Reuse the slice protection of run to parity to ensure such runtime
quantum.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20250708165630.1948751-3-vincent.guittot@linaro.org
---
 include/linux/sched.h | 10 +++++++++-
 kernel/sched/fair.c   | 31 ++++++++++++++++++++-----------
 2 files changed, 29 insertions(+), 12 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4802fcf..5592138 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -583,7 +583,15 @@ struct sched_entity {
 	u64				sum_exec_runtime;
 	u64				prev_sum_exec_runtime;
 	u64				vruntime;
-	s64				vlag;
+	union {
+		/*
+		 * When !@on_rq this field is vlag.
+		 * When cfs_rq->curr == se (which implies @on_rq)
+		 * this field is vprot. See protect_slice().
+		 */
+		s64                     vlag;
+		u64                     vprot;
+	};
 	u64				slice;
 
 	u64				nr_migrations;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 43fe5c8..8d288df 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -882,23 +882,35 @@ struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
 }
 
 /*
- * HACK, stash a copy of deadline at the point of pick in vlag,
- * which isn't used until dequeue.
+ * Set the vruntime up to which an entity can run before looking
+ * for another entity to pick.
+ * In case of run to parity, we protect the entity up to its deadline.
+ * When run to parity is disabled, we give a minimum quantum to the running
+ * entity to ensure progress.
  */
 static inline void set_protect_slice(struct sched_entity *se)
 {
-	se->vlag = se->deadline;
+	u64 slice = se->slice;
+	u64 vprot = se->deadline;
+
+	if (!sched_feat(RUN_TO_PARITY))
+		slice = min(slice, normalized_sysctl_sched_base_slice);
+
+	if (slice != se->slice)
+		vprot = min_vruntime(vprot, se->vruntime + calc_delta_fair(slice, se));
+
+	se->vprot = vprot;
 }
 
 static inline bool protect_slice(struct sched_entity *se)
 {
-	return se->vlag == se->deadline;
+	return ((s64)(se->vprot - se->vruntime) > 0);
 }
 
 static inline void cancel_protect_slice(struct sched_entity *se)
 {
 	if (protect_slice(se))
-		se->vlag = se->deadline + 1;
+		se->vprot = se->vruntime;
 }
 
 /*
@@ -937,7 +949,7 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
 	if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
 		curr = NULL;
 
-	if (sched_feat(RUN_TO_PARITY) && curr && protect_slice(curr))
+	if (curr && protect_slice(curr))
 		return curr;
 
 	/* Pick the leftmost entity if it's eligible */
@@ -1156,11 +1168,8 @@ static inline void update_curr_task(struct task_struct *p, s64 delta_exec)
 	cgroup_account_cputime(p, delta_exec);
 }
 
-static inline bool did_preempt_short(struct cfs_rq *cfs_rq, struct sched_entity *curr)
+static inline bool resched_next_slice(struct cfs_rq *cfs_rq, struct sched_entity *curr)
 {
-	if (!sched_feat(PREEMPT_SHORT))
-		return false;
-
 	if (protect_slice(curr))
 		return false;
 
@@ -1248,7 +1257,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 	if (cfs_rq->nr_queued == 1)
 		return;
 
-	if (resched || did_preempt_short(cfs_rq, curr)) {
+	if (resched || resched_next_slice(cfs_rq, curr)) {
 		resched_curr_lazy(rq);
 		clear_buddies(cfs_rq, curr);
 	}