Group the mostly read fields in struct sched_entity to the first
cacheline when `CONFIG_FAIR_GROUP_SCHED` is set. This moves the
additional fields from `CONFIG_FAIR_GROUP_SCHED` to the first cache line
since they are mostly accessed and generally read most. Currently these
fields related to cfs cgroup scheduling is placed on a separate
cacheline from hot fields `load`, `on_rq` and `vruntime`. Although
`depth` is not as hot as other fields, we keep it here to avoid breaking
the #ifdef boundaries.
Also adds a compile time check when `CONFIG_FAIR_GROUP_SCHED` is set to
check the placement of the hot fields.
Signed-off-by: Zecheng Li <zecheng@google.com>
---
include/linux/sched.h | 37 ++++++++++++++++++++-----------------
kernel/sched/core.c | 20 ++++++++++++++++++++
2 files changed, 40 insertions(+), 17 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c15365a30c0..e9f58254999d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -545,40 +545,43 @@ struct sched_statistics {
} ____cacheline_aligned;
struct sched_entity {
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ /* Group the read most hot fields in sched_entity in a cache line */
+ __cacheline_group_begin_aligned(hot);
+ struct sched_entity *parent;
+ /* rq on which this entity is (to be) queued: */
+ struct cfs_rq *cfs_rq;
+ /* rq "owned" by this entity/group: */
+ struct cfs_rq *my_q;
+ /* cached value of my_q->h_nr_running */
+ unsigned long runnable_weight;
+ int depth;
+#endif
+ unsigned char on_rq;
+ unsigned char sched_delayed;
+ unsigned char rel_deadline;
+ unsigned char custom_slice;
/* For load-balancing: */
struct load_weight load;
+ u64 vruntime;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ __cacheline_group_end_aligned(hot);
+#endif
struct rb_node run_node;
u64 deadline;
u64 min_vruntime;
u64 min_slice;
struct list_head group_node;
- unsigned char on_rq;
- unsigned char sched_delayed;
- unsigned char rel_deadline;
- unsigned char custom_slice;
- /* hole */
u64 exec_start;
u64 sum_exec_runtime;
u64 prev_sum_exec_runtime;
- u64 vruntime;
s64 vlag;
u64 slice;
u64 nr_migrations;
-#ifdef CONFIG_FAIR_GROUP_SCHED
- int depth;
- struct sched_entity *parent;
- /* rq on which this entity is (to be) queued: */
- struct cfs_rq *cfs_rq;
- /* rq "owned" by this entity/group: */
- struct cfs_rq *my_q;
- /* cached value of my_q->h_nr_running */
- unsigned long runnable_weight;
-#endif
-
#ifdef CONFIG_SMP
/*
* Per entity load average tracking.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 84ee289d98d7..58bcd7d55eca 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8474,6 +8474,7 @@ static struct kmem_cache *task_group_cache __ro_after_init;
#endif
static void __init cfs_rq_struct_check(void);
+static void __init sched_entity_struct_check(void);
void __init sched_init(void)
{
@@ -8492,6 +8493,7 @@ void __init sched_init(void)
BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class));
#endif
cfs_rq_struct_check();
+ sched_entity_struct_check();
wait_bit_init();
#ifdef CONFIG_FAIR_GROUP_SCHED
@@ -10755,3 +10757,21 @@ static void __init cfs_rq_struct_check(void)
#endif
#endif
}
+
+static void __init sched_entity_struct_check(void)
+{
+ /*
+ * The compile time check is only enabled with CONFIG_FAIR_GROUP_SCHED.
+ * We care about the placement of six hottest fields below.
+ */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, parent);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, cfs_rq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, my_q);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot,
+ runnable_weight);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, on_rq);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, load);
+ CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, vruntime);
+#endif
+}
--
2.49.0
© 2016 - 2025 Red Hat, Inc.