[PATCH 6/9] sched/fair: Removed unsued cfs_rq.h_nr_delayed

Vincent Guittot posted 9 patches 1 year, 2 months ago
There is a newer version of this series
[PATCH 6/9] sched/fair: Removed unsued cfs_rq.h_nr_delayed
Posted by Vincent Guittot 1 year, 2 months ago
h_nr_delayed is not used anymore. We now have
- h_nr_running which tracks tasks ready to run
- h_nr_enqueued which tracks enqueued tasks either ready to run or delayed
  dequeue

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
 kernel/sched/debug.c |  1 -
 kernel/sched/fair.c  | 41 ++++++++++++++---------------------------
 kernel/sched/sched.h |  1 -
 3 files changed, 14 insertions(+), 29 deletions(-)

diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 8f5273043c16..84f623b9d4af 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -846,7 +846,6 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
 	SEQ_printf(m, "  .%-30s: %d\n", "nr_enqueued", cfs_rq->nr_enqueued);
 	SEQ_printf(m, "  .%-30s: %d\n", "h_nr_running", cfs_rq->h_nr_running);
 	SEQ_printf(m, "  .%-30s: %d\n", "h_nr_enqueued", cfs_rq->h_nr_enqueued);
-	SEQ_printf(m, "  .%-30s: %d\n", "h_nr_delayed", cfs_rq->h_nr_delayed);
 	SEQ_printf(m, "  .%-30s: %d\n", "h_nr_idle",
 			cfs_rq->h_nr_idle);
 	SEQ_printf(m, "  .%-30s: %ld\n", "load", cfs_rq->load.weight);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index a96a771d8e61..1b4f1b610543 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5468,21 +5468,18 @@ static void set_delayed(struct sched_entity *se)
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
 		cfs_rq->h_nr_running--;
-		cfs_rq->h_nr_delayed++;
 		if (cfs_rq_throttled(cfs_rq))
 			break;
 	}
 }
 
-static void clear_delayed(struct sched_entity *se, bool running)
+static void clear_delayed(struct sched_entity *se)
 {
 	se->sched_delayed = 0;
 	for_each_sched_entity(se) {
 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
 
-		if (running)
-			cfs_rq->h_nr_running++;
-		cfs_rq->h_nr_delayed--;
+		cfs_rq->h_nr_running++;
 		if (cfs_rq_throttled(cfs_rq))
 			break;
 	}
@@ -5490,7 +5487,7 @@ static void clear_delayed(struct sched_entity *se, bool running)
 
 static inline void finish_delayed_dequeue_entity(struct sched_entity *se)
 {
-	clear_delayed(se, false);
+	se->sched_delayed = 0;
 	if (sched_feat(DELAY_ZERO) && se->vlag > 0)
 		se->vlag = 0;
 }
@@ -5934,7 +5931,7 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
 	struct rq *rq = rq_of(cfs_rq);
 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
 	struct sched_entity *se;
-	long running_delta, enqueued_delta, idle_delta, delayed_delta, dequeue = 1;
+	long running_delta, enqueued_delta, idle_delta, dequeue = 1;
 	long rq_h_nr_enqueued = rq->cfs.h_nr_enqueued;
 
 	raw_spin_lock(&cfs_b->lock);
@@ -5968,7 +5965,6 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
 	running_delta = cfs_rq->h_nr_running;
 	enqueued_delta = cfs_rq->h_nr_enqueued;
 	idle_delta = cfs_rq->h_nr_idle;
-	delayed_delta = cfs_rq->h_nr_delayed;
 	for_each_sched_entity(se) {
 		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
 		int flags;
@@ -5993,7 +5989,6 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
 		qcfs_rq->h_nr_running -= running_delta;
 		qcfs_rq->h_nr_enqueued -= enqueued_delta;
 		qcfs_rq->h_nr_idle -= idle_delta;
-		qcfs_rq->h_nr_delayed -= delayed_delta;
 
 		if (qcfs_rq->load.weight) {
 			/* Avoid re-evaluating load for this entity: */
@@ -6017,7 +6012,6 @@ static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
 		qcfs_rq->h_nr_running -= running_delta;
 		qcfs_rq->h_nr_enqueued -= enqueued_delta;
 		qcfs_rq->h_nr_idle -= idle_delta;
-		qcfs_rq->h_nr_delayed -= delayed_delta;
 	}
 
 	/* At this point se is NULL and we are at root level*/
@@ -6043,7 +6037,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 	struct rq *rq = rq_of(cfs_rq);
 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
 	struct sched_entity *se;
-	long running_delta, enqueued_delta, idle_delta, delayed_delta;
+	long running_delta, enqueued_delta, idle_delta;
 	long rq_h_nr_enqueued = rq->cfs.h_nr_enqueued;
 
 	se = cfs_rq->tg->se[cpu_of(rq)];
@@ -6080,7 +6074,6 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 	running_delta = cfs_rq->h_nr_running;
 	enqueued_delta = cfs_rq->h_nr_enqueued;
 	idle_delta = cfs_rq->h_nr_idle;
-	delayed_delta = cfs_rq->h_nr_delayed;
 	for_each_sched_entity(se) {
 		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
 
@@ -6099,7 +6092,6 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 		qcfs_rq->h_nr_running += running_delta;
 		qcfs_rq->h_nr_enqueued += enqueued_delta;
 		qcfs_rq->h_nr_idle += idle_delta;
-		qcfs_rq->h_nr_delayed += delayed_delta;
 
 		/* end evaluation on encountering a throttled cfs_rq */
 		if (cfs_rq_throttled(qcfs_rq))
@@ -6118,7 +6110,6 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
 		qcfs_rq->h_nr_running += running_delta;
 		qcfs_rq->h_nr_enqueued += enqueued_delta;
 		qcfs_rq->h_nr_idle += idle_delta;
-		qcfs_rq->h_nr_delayed += delayed_delta;
 
 		/* end evaluation on encountering a throttled cfs_rq */
 		if (cfs_rq_throttled(qcfs_rq))
@@ -6972,7 +6963,7 @@ requeue_delayed_entity(struct sched_entity *se)
 	}
 
 	update_load_avg(cfs_rq, se, 0);
-	clear_delayed(se, true);
+	clear_delayed(se);
 }
 
 /*
@@ -6986,7 +6977,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
 	int h_nr_idle = task_has_idle_policy(p);
-	int h_nr_delayed = 0;
+	int se_delayed = 0;
 	int task_new = !(flags & ENQUEUE_WAKEUP);
 	int rq_h_nr_enqueued = rq->cfs.h_nr_enqueued;
 	u64 slice = 0;
@@ -7014,7 +7005,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
 
 	if (task_new)
-		h_nr_delayed = !!se->sched_delayed;
+		se_delayed = !!se->sched_delayed;
 
 	for_each_sched_entity(se) {
 		if (se->on_rq) {
@@ -7036,11 +7027,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		enqueue_entity(cfs_rq, se, flags);
 		slice = cfs_rq_min_slice(cfs_rq);
 
-		if (!h_nr_delayed)
+		if (!se_delayed)
 			cfs_rq->h_nr_running++;
 		cfs_rq->h_nr_enqueued++;
 		cfs_rq->h_nr_idle += h_nr_idle;
-		cfs_rq->h_nr_delayed += h_nr_delayed;
 
 		if (cfs_rq_is_idle(cfs_rq))
 			h_nr_idle = 1;
@@ -7062,11 +7052,10 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 		se->slice = slice;
 		slice = cfs_rq_min_slice(cfs_rq);
 
-		if (!h_nr_delayed)
+		if (!se_delayed)
 			cfs_rq->h_nr_running++;
 		cfs_rq->h_nr_enqueued++;
 		cfs_rq->h_nr_idle += h_nr_idle;
-		cfs_rq->h_nr_delayed += h_nr_delayed;
 
 		if (cfs_rq_is_idle(cfs_rq))
 			h_nr_idle = 1;
@@ -7129,7 +7118,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
 	struct task_struct *p = NULL;
 	int h_nr_idle = 0;
 	int h_nr_running = 0;
-	int h_nr_delayed = 0;
+	int se_delayed = 0;
 	struct cfs_rq *cfs_rq;
 	u64 slice = 0;
 
@@ -7138,7 +7127,7 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
 		h_nr_running = 1;
 		h_nr_idle = task_has_idle_policy(p);
 		if (!task_sleep && !task_delayed)
-			h_nr_delayed = !!se->sched_delayed;
+			se_delayed = !!se->sched_delayed;
 	} else {
 		cfs_rq = group_cfs_rq(se);
 		slice = cfs_rq_min_slice(cfs_rq);
@@ -7154,11 +7143,10 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
 			break;
 		}
 
-		if (!h_nr_delayed)
+		if (!se_delayed)
 			cfs_rq->h_nr_running -= h_nr_running;
 		cfs_rq->h_nr_enqueued -= h_nr_running;
 		cfs_rq->h_nr_idle -= h_nr_idle;
-		cfs_rq->h_nr_delayed -= h_nr_delayed;
 
 		if (cfs_rq_is_idle(cfs_rq))
 			h_nr_idle = h_nr_running;
@@ -7195,11 +7183,10 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
 		se->slice = slice;
 		slice = cfs_rq_min_slice(cfs_rq);
 
-		if (!h_nr_delayed)
+		if (!se_delayed)
 			cfs_rq->h_nr_running -= h_nr_running;
 		cfs_rq->h_nr_enqueued -= h_nr_running;
 		cfs_rq->h_nr_idle -= h_nr_idle;
-		cfs_rq->h_nr_delayed -= h_nr_delayed;
 
 		if (cfs_rq_is_idle(cfs_rq))
 			h_nr_idle = h_nr_running;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7d99d18e8984..0b297242eb5d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -649,7 +649,6 @@ struct cfs_rq {
 	unsigned int		h_nr_running;      /* SCHED_{NORMAL,BATCH,IDLE} */
 	unsigned int		h_nr_enqueued;
 	unsigned int		h_nr_idle; /* SCHED_IDLE */
-	unsigned int		h_nr_delayed;
 
 	s64			avg_vruntime;
 	u64			avg_load;
-- 
2.43.0
Re: [PATCH 6/9] sched/fair: Removed unsued cfs_rq.h_nr_delayed
Posted by Peter Zijlstra 1 year, 2 months ago
On Thu, Nov 28, 2024 at 10:27:47AM +0100, Vincent Guittot wrote:
> h_nr_delayed is not used anymore. We now have
> - h_nr_running which tracks tasks ready to run
> - h_nr_enqueued which tracks enqueued tasks either ready to run or delayed
>   dequeue

Oh, now I see where you're going.

Let me read the lot again, because this sure as hell was a confusing
swizzle.
Re: [PATCH 6/9] sched/fair: Removed unsued cfs_rq.h_nr_delayed
Posted by Peter Zijlstra 1 year, 2 months ago
On Thu, Nov 28, 2024 at 11:03:48AM +0100, Peter Zijlstra wrote:
> On Thu, Nov 28, 2024 at 10:27:47AM +0100, Vincent Guittot wrote:
> > h_nr_delayed is not used anymore. We now have
> > - h_nr_running which tracks tasks ready to run
> > - h_nr_enqueued which tracks enqueued tasks either ready to run or delayed
> >   dequeue
> 
> Oh, now I see where you're going.
> 
> Let me read the lot again, because this sure as hell was a confusing
> swizzle.

So the first patch adds h_nr_delayed.

Then confusion

Then we end up with:

 h_nr_enqueued = h_nr_running + h_nr_delayed

Where h_nr_enqueued is part of rq->nr_running (and somewhere along the
way you rename and remove some idle numbers).

Can't we structure it like:

  - add h_nr_delayed
  - rename h_nr_running to h_nr_queued
  - add h_nr_runnable = h_nr_queued - h_nr_delayed
  - use h_hr_runnable
  - remove h_nr_delayed

  - clean up idle muck


And I'm assuming this ordering is because people want h_nr_delayed
backported. Because the even more sensible order would be something
like:

 - rename h_nr_running into h_nr_queued
 - add h_nr_runnable (being h_nr_queued - h_nr_delayed, without ever
   having had h_nr_delayed).
 - use h_nr_runnable
 
 - clean up idle muck
Re: [PATCH 6/9] sched/fair: Removed unsued cfs_rq.h_nr_delayed
Posted by Vincent Guittot 1 year, 2 months ago
On Thu, 28 Nov 2024 at 11:15, Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Thu, Nov 28, 2024 at 11:03:48AM +0100, Peter Zijlstra wrote:
> > On Thu, Nov 28, 2024 at 10:27:47AM +0100, Vincent Guittot wrote:
> > > h_nr_delayed is not used anymore. We now have
> > > - h_nr_running which tracks tasks ready to run
> > > - h_nr_enqueued which tracks enqueued tasks either ready to run or delayed
> > >   dequeue
> >
> > Oh, now I see where you're going.
> >
> > Let me read the lot again, because this sure as hell was a confusing
> > swizzle.
>
> So the first patch adds h_nr_delayed.
>
> Then confusion

I started from your patch that adds h_nr_delayed and added on top the
steps to move to h_nr_enqueued and h_nr_running to make it easier to
understand the changes

>
> Then we end up with:
>
>  h_nr_enqueued = h_nr_running + h_nr_delayed
>
> Where h_nr_enqueued is part of rq->nr_running (and somewhere along the
> way you rename and remove some idle numbers).
>
> Can't we structure it like:
>
>   - add h_nr_delayed
>   - rename h_nr_running to h_nr_queued
>   - add h_nr_runnable = h_nr_queued - h_nr_delayed
>   - use h_hr_runnable
>   - remove h_nr_delayed
>
>   - clean up idle muck
>

I can reorder the patches following the above

>
> And I'm assuming this ordering is because people want h_nr_delayed
> backported. Because the even more sensible order would be something
> like:
>
>  - rename h_nr_running into h_nr_queued
>  - add h_nr_runnable (being h_nr_queued - h_nr_delayed, without ever
>    having had h_nr_delayed).
>  - use h_nr_runnable
>
>  - clean up idle muck
>
>