[PATCH v2] sched: fair: Prevent negative lag increase during delayed dequeue

Vincent Guittot posted 1 patch 14 hours ago
kernel/sched/fair.c | 53 ++++++++++++++++++++++++++-------------------
1 file changed, 31 insertions(+), 22 deletions(-)
[PATCH v2] sched: fair: Prevent negative lag increase during delayed dequeue
Posted by Vincent Guittot 14 hours ago
Delayed dequeue feature aims to reduce the negative lag of a dequeued task
while sleeping but it can happens that newly enqueued tasks will move
backward the avg vruntime and increase its negative lag.
When the delayed dequeued task wakes up, it has more neg lag compared to
being dequeued immediately or to other tasks that have been dequeued just
before theses new enqueues.

Ensure that the negative lag of a delayed dequeued task doesn't increase
during its delayed dequeued phase while waiting for its neg lag to
diseappear. Similarly, we remove any positive lag that the delayed
dequeued task could have gain during thsi period.

Short slice tasks are particularly impacted in overloaded system.

Test on snapdragon rb5:

hackbench -T -p -l 16000000 -g 2 1> /dev/null &
cyclictest -t 1 -i 2777 -D 333 --policy=fair --mlock  -h 20000 -q

The scheduling latency of cyclictest is:

                       tip/sched/core  tip/sched/core    +this patch
cyclictest slice  (ms) (default)2.8             8               8
hackbench slice   (ms) (default)2.8            20              20
Total Samples          |   115632          119733          119806
Average           (us) |      364              64(-82%)        61(- 5%)
Median (P50)      (us) |       60              56(- 7%)        56(  0%)
90th Percentile   (us) |     1166              62(-95%)        62(  0%)
99th Percentile   (us) |     4192              73(-98%)        72(- 1%)
99.9th Percentile (us) |     8528            2707(-68%)      1300(-52%)
Maximum           (us) |    17735           14273(-20%)     13525(- 5%)

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---

Since v1:
- Embedded the check of lag evolution of delayed dequeue entities in
  update_entity_lag() to include all cases.

 kernel/sched/fair.c | 53 ++++++++++++++++++++++++++-------------------
 1 file changed, 31 insertions(+), 22 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 226509231e67..c1ffe86bf78d 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -840,11 +840,30 @@ static s64 entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se, u64 avrunt
 	return clamp(vlag, -limit, limit);
 }
 
-static void update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
+/*
+ * Delayed dequeue aims to reduce the negative lag of a dequeued task.
+ * While updating the lag of an entity, check that negative lag didn't increase
+ * during the delayed dequeue period which would be unfair.
+ * Similarly, check that the entity didn't gain positive lag when DELAY_ZERO is
+ * set.
+ *
+ * Return true if the lag has been adjusted.
+ */
+static bool update_entity_lag(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
+	s64 vlag;
+
 	WARN_ON_ONCE(!se->on_rq);
 
-	se->vlag = entity_lag(cfs_rq, se, avg_vruntime(cfs_rq));
+	vlag = entity_lag(cfs_rq, se, avg_vruntime(cfs_rq));
+
+	if (se->sched_delayed)
+		/* previous vlag < 0 otherwise se would not be delayed */
+		se->vlag = clamp(vlag, se->vlag, sched_feat(DELAY_ZERO) ? 0 : S64_MAX);
+	else
+		se->vlag = vlag;
+
+	return (vlag != se->vlag);
 }
 
 /*
@@ -5563,13 +5582,6 @@ static void clear_delayed(struct sched_entity *se)
 	}
 }
 
-static inline void finish_delayed_dequeue_entity(struct sched_entity *se)
-{
-	clear_delayed(se);
-	if (sched_feat(DELAY_ZERO) && se->vlag > 0)
-		se->vlag = 0;
-}
-
 static bool
 dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
@@ -5595,6 +5607,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 		if (sched_feat(DELAY_DEQUEUE) && delay &&
 		    !entity_eligible(cfs_rq, se)) {
 			update_load_avg(cfs_rq, se, 0);
+			update_entity_lag(cfs_rq, se);
 			set_delayed(se);
 			return false;
 		}
@@ -5634,7 +5647,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	update_cfs_group(se);
 
 	if (flags & DEQUEUE_DELAYED)
-		finish_delayed_dequeue_entity(se);
+		clear_delayed(se);
 
 	if (cfs_rq->nr_queued == 0) {
 		update_idle_cfs_rq_clock_pelt(cfs_rq);
@@ -7088,18 +7101,14 @@ requeue_delayed_entity(struct sched_entity *se)
 	WARN_ON_ONCE(!se->sched_delayed);
 	WARN_ON_ONCE(!se->on_rq);
 
-	if (sched_feat(DELAY_ZERO)) {
-		update_entity_lag(cfs_rq, se);
-		if (se->vlag > 0) {
-			cfs_rq->nr_queued--;
-			if (se != cfs_rq->curr)
-				__dequeue_entity(cfs_rq, se);
-			se->vlag = 0;
-			place_entity(cfs_rq, se, 0);
-			if (se != cfs_rq->curr)
-				__enqueue_entity(cfs_rq, se);
-			cfs_rq->nr_queued++;
-		}
+	if (update_entity_lag(cfs_rq, se)) {
+		cfs_rq->nr_queued--;
+		if (se != cfs_rq->curr)
+			__dequeue_entity(cfs_rq, se);
+		place_entity(cfs_rq, se, 0);
+		if (se != cfs_rq->curr)
+			__enqueue_entity(cfs_rq, se);
+		cfs_rq->nr_queued++;
 	}
 
 	update_load_avg(cfs_rq, se, 0);
-- 
2.43.0