[PATCH v7 1/3] sched/fair: Co-locate cfs_rq and sched_entity in cfs_tg_state

Zecheng Li posted 3 patches 3 weeks, 1 day ago
There is a newer version of this series
[PATCH v7 1/3] sched/fair: Co-locate cfs_rq and sched_entity in cfs_tg_state
Posted by Zecheng Li 3 weeks, 1 day ago
From: Zecheng Li <zecheng@google.com>

Improve data locality and reduce pointer chasing by allocating struct
cfs_rq and struct sched_entity together for non-root task groups. This
is achieved by introducing a new combined struct cfs_tg_state that
holds both objects in a single allocation.

This patch:

- Introduces struct cfs_tg_state that embeds cfs_rq, sched_entity, and
sched_statistics together in a single structure.

- Updates __schedstats_from_se() in stats.h to use cfs_tg_state for
accessing sched_statistics from a group sched_entity.

- Modifies alloc_fair_sched_group() and free_fair_sched_group() to
allocate and free the new struct as a single unit.

- Modifies the per-CPU pointers in task_group->se and task_group->cfs_rq
to point to the members in the new combined structure.

Signed-off-by: Zecheng Li <zecheng@google.com>
Signed-off-by: Zecheng Li <zli94@ncsu.edu>
---
 kernel/sched/fair.c  | 25 +++++++++++--------------
 kernel/sched/sched.h | 12 ++++++++++++
 kernel/sched/stats.h |  9 +--------
 3 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 04993c763a06..0897dab69236 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -13617,10 +13617,11 @@ void free_fair_sched_group(struct task_group *tg)
 	int i;
 
 	for_each_possible_cpu(i) {
-		if (tg->cfs_rq)
-			kfree(tg->cfs_rq[i]);
-		if (tg->se)
-			kfree(tg->se[i]);
+		if (tg->cfs_rq && tg->cfs_rq[i]) {
+			struct cfs_tg_state *state =
+				container_of(tg->cfs_rq[i], struct cfs_tg_state, cfs_rq);
+			kfree(state);
+		}
 	}
 
 	kfree(tg->cfs_rq);
@@ -13629,6 +13630,7 @@ void free_fair_sched_group(struct task_group *tg)
 
 int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 {
+	struct cfs_tg_state *state;
 	struct sched_entity *se;
 	struct cfs_rq *cfs_rq;
 	int i;
@@ -13645,16 +13647,13 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 	init_cfs_bandwidth(tg_cfs_bandwidth(tg), tg_cfs_bandwidth(parent));
 
 	for_each_possible_cpu(i) {
-		cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
-				      GFP_KERNEL, cpu_to_node(i));
-		if (!cfs_rq)
+		state = kzalloc_node(sizeof(*state),
+				     GFP_KERNEL, cpu_to_node(i));
+		if (!state)
 			goto err;
 
-		se = kzalloc_node(sizeof(struct sched_entity_stats),
-				  GFP_KERNEL, cpu_to_node(i));
-		if (!se)
-			goto err_free_rq;
-
+		cfs_rq = &state->cfs_rq;
+		se = &state->se;
 		init_cfs_rq(cfs_rq);
 		init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
 		init_entity_runnable_average(se);
@@ -13662,8 +13661,6 @@ int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
 
 	return 1;
 
-err_free_rq:
-	kfree(cfs_rq);
 err:
 	return 0;
 }
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 58c9d244f12b..50b37ed2f7d6 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2191,6 +2191,18 @@ static inline struct task_group *task_group(struct task_struct *p)
 	return p->sched_task_group;
 }
 
+#ifdef CONFIG_FAIR_GROUP_SCHED
+/*
+ * Defined here to be available before stats.h is included, since
+ * stats.h has dependencies on things defined later in this file.
+ */
+struct cfs_tg_state {
+	struct cfs_rq		cfs_rq;
+	struct sched_entity	se;
+	struct sched_statistics	stats;
+} __no_randomize_layout;
+#endif
+
 /* Change a task's cfs_rq and parent entity if it moves across CPUs/groups */
 static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
 {
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index c903f1a42891..63b9a800a354 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -89,19 +89,12 @@ static inline void rq_sched_info_depart  (struct rq *rq, unsigned long long delt
 
 #endif /* CONFIG_SCHEDSTATS */
 
-#ifdef CONFIG_FAIR_GROUP_SCHED
-struct sched_entity_stats {
-	struct sched_entity     se;
-	struct sched_statistics stats;
-} __no_randomize_layout;
-#endif
-
 static inline struct sched_statistics *
 __schedstats_from_se(struct sched_entity *se)
 {
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	if (!entity_is_task(se))
-		return &container_of(se, struct sched_entity_stats, se)->stats;
+		return &container_of(se, struct cfs_tg_state, se)->stats;
 #endif
 	return &task_of(se)->stats;
 }
-- 
2.52.0
Re: [PATCH v7 1/3] sched/fair: Co-locate cfs_rq and sched_entity in cfs_tg_state
Posted by K Prateek Nayak 3 weeks ago
Hello Zecheng,

On 1/18/2026 9:04 AM, Zecheng Li wrote:
> @@ -13617,10 +13617,11 @@ void free_fair_sched_group(struct task_group *tg)
>  	int i;
>  
>  	for_each_possible_cpu(i) {
> -		if (tg->cfs_rq)
> -			kfree(tg->cfs_rq[i]);
> -		if (tg->se)
> -			kfree(tg->se[i]);
> +		if (tg->cfs_rq && tg->cfs_rq[i]) {

nit. Since the cfs_tg_state uses __no_randomize_layout now and, "cfs_rq"
is the first member, you can just do kfree(tg->cfs_rq[i]) since it is
the beginning of cfs_tg_state too.

This is how kfree() on tg->se[i] would have freed the entire
"sched_entity_stats" previously.

I know this is a transient change since later you switch to using
free_percpu() but it would still be nice to have a simplified hunk here.

> +			struct cfs_tg_state *state =
> +				container_of(tg->cfs_rq[i], struct cfs_tg_state, cfs_rq);
> +			kfree(state);
> +		}
>  	}
>  
>  	kfree(tg->cfs_rq);

-- 
Thanks and Regards,
Prateek