[v1] sched/eevdf: Optimize reweight and pick

[PATCH 3/4] sched/eevdf: O(1) fastpath for task selection

Posted by Abel Wu 2 years, 3 months ago

Since the RB-tree is now sorted by deadline, let's first try the
leftmost entity which has the earliest virtual deadline. I've done
some benchmarks to see its effectiveness.

All the benchmarks are done inside a normal cpu cgroup in a clean
environment with cpu turbo disabled, on a dual-CPU Intel Xeon(R)
Platinum 8260 with 2 NUMA nodes each of which has 24C/48T.

  hackbench: process/thread + pipe/socket + 1/2/4/8 groups
  netperf:   TCP/UDP + STREAM/RR + 24/48/72/96/192 threads
  tbench:    loopback 24/48/72/96/192 threads
  schbench:  1/2/4/8 mthreads

  direct:    cfs_rq has only one entity
  parity:    RUN_TO_PARITY
  fast:      O(1) fastpath
  slow:	     heap search

    (%)		direct	parity	fast	slow
  hackbench	92.95	2.02	4.91	0.12
  netperf	68.08	6.60	24.18	1.14
  tbench	67.55	11.22	20.61	0.62
  schbench	69.91	2.65	25.73	1.71

The above results indicate that this fastpath really makes task
selection more efficient.

Signed-off-by: Abel Wu <wuyun.abel@bytedance.com>
---
 kernel/sched/fair.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 459487bf8824..a1fdd0c7a051 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -896,6 +896,7 @@ int sched_update_scaling(void)
 static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
 {
 	struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
+	struct sched_entity *se = __pick_first_entity(cfs_rq);
 	struct sched_entity *curr = cfs_rq->curr;
 	struct sched_entity *best = NULL;
 
@@ -904,7 +905,7 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
 	 * in this cfs_rq, saving some cycles.
 	 */
 	if (cfs_rq->nr_running == 1)
-		return curr && curr->on_rq ? curr : __node_2_se(node);
+		return curr && curr->on_rq ? curr : se;
 
 	if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
 		curr = NULL;
@@ -916,9 +917,14 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
 	if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
 		return curr;
 
+	/* Pick the leftmost entity if it's eligible */
+	if (se && entity_eligible(cfs_rq, se)) {
+		best = se;
+		goto found;
+	}
+
 	/* Heap search for the EEVD entity */
 	while (node) {
-		struct sched_entity *se = __node_2_se(node);
 		struct rb_node *left = node->rb_left;
 
 		/*
@@ -931,6 +937,8 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
 			continue;
 		}
 
+		se = __node_2_se(node);
+
 		/*
 		 * The left subtree either is empty or has no eligible
 		 * entity, so check the current node since it is the one
@@ -943,7 +951,7 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
 
 		node = node->rb_right;
 	}
-
+found:
 	if (!best || (curr && entity_before(curr, best)))
 		best = curr;
 
-- 
2.37.3

Re: [PATCH 3/4] sched/eevdf: O(1) fastpath for task selection

Posted by Abel Wu 2 years, 3 months ago

On 11/7/23 5:05 PM, Abel Wu Wrote:
> Since the RB-tree is now sorted by deadline, let's first try the
> leftmost entity which has the earliest virtual deadline. I've done
> some benchmarks to see its effectiveness.
> 
> All the benchmarks are done inside a normal cpu cgroup in a clean
> environment with cpu turbo disabled, on a dual-CPU Intel Xeon(R)
> Platinum 8260 with 2 NUMA nodes each of which has 24C/48T.
> 
>    hackbench: process/thread + pipe/socket + 1/2/4/8 groups
>    netperf:   TCP/UDP + STREAM/RR + 24/48/72/96/192 threads
>    tbench:    loopback 24/48/72/96/192 threads
>    schbench:  1/2/4/8 mthreads
> 
>    direct:    cfs_rq has only one entity
>    parity:    RUN_TO_PARITY
>    fast:      O(1) fastpath
>    slow:	     heap search
> 
>      (%)		direct	parity	fast	slow
>    hackbench	92.95	2.02	4.91	0.12
>    netperf	68.08	6.60	24.18	1.14
>    tbench	67.55	11.22	20.61	0.62
>    schbench	69.91	2.65	25.73	1.71
> 
> The above results indicate that this fastpath really makes task
> selection more efficient.
> 
> Signed-off-by: Abel Wu <wuyun.abel@bytedance.com>
> ---
>   kernel/sched/fair.c | 14 +++++++++++---
>   1 file changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 459487bf8824..a1fdd0c7a051 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -896,6 +896,7 @@ int sched_update_scaling(void)
>   static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
>   {
>   	struct rb_node *node = cfs_rq->tasks_timeline.rb_root.rb_node;
> +	struct sched_entity *se = __pick_first_entity(cfs_rq);
>   	struct sched_entity *curr = cfs_rq->curr;
>   	struct sched_entity *best = NULL;
>   
> @@ -904,7 +905,7 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
>   	 * in this cfs_rq, saving some cycles.
>   	 */
>   	if (cfs_rq->nr_running == 1)
> -		return curr && curr->on_rq ? curr : __node_2_se(node);
> +		return curr && curr->on_rq ? curr : se;

Maybe we can reduce memory footprint on curr by:

		return se ? se : curr;

>   
>   	if (curr && (!curr->on_rq || !entity_eligible(cfs_rq, curr)))
>   		curr = NULL;
> @@ -916,9 +917,14 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
>   	if (sched_feat(RUN_TO_PARITY) && curr && curr->vlag == curr->deadline)
>   		return curr;
>   
> +	/* Pick the leftmost entity if it's eligible */
> +	if (se && entity_eligible(cfs_rq, se)) {
> +		best = se;
> +		goto found;
> +	}
> +
>   	/* Heap search for the EEVD entity */
>   	while (node) {
> -		struct sched_entity *se = __node_2_se(node);
>   		struct rb_node *left = node->rb_left;
>   
>   		/*
> @@ -931,6 +937,8 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
>   			continue;
>   		}
>   
> +		se = __node_2_se(node);
> +
>   		/*
>   		 * The left subtree either is empty or has no eligible
>   		 * entity, so check the current node since it is the one
> @@ -943,7 +951,7 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
>   
>   		node = node->rb_right;
>   	}
> -
> +found:
>   	if (!best || (curr && entity_before(curr, best)))
>   		best = curr;
>

Re: [PATCH 3/4] sched/eevdf: O(1) fastpath for task selection

Posted by Peter Zijlstra 2 years, 3 months ago

On Tue, Nov 07, 2023 at 06:12:49PM +0800, Abel Wu wrote:

> > @@ -904,7 +905,7 @@ static struct sched_entity *pick_eevdf(struct cfs_rq *cfs_rq)
> >   	 * in this cfs_rq, saving some cycles.
> >   	 */
> >   	if (cfs_rq->nr_running == 1)
> > -		return curr && curr->on_rq ? curr : __node_2_se(node);
> > +		return curr && curr->on_rq ? curr : se;
> 
> Maybe we can reduce memory footprint on curr by:
> 
> 		return se ? se : curr;

Irrespective, I think that logic makes more sense. If we know we have
but one task and the tree has a task, it must be that task, otherwise,
current must be it. 

Anyway, I was still staring at the previous patch, flipping the tree
around like that is clever. Yes I suppose that ought to work just fine.