From nobody Mon Apr  7 14:55:04 2025
Received: from mail-yw1-f201.google.com (mail-yw1-f201.google.com
 [209.85.128.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 004B11F4295
	for <linux-kernel@vger.kernel.org>; Wed,  2 Apr 2025 21:29:35 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=209.85.128.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743629377; cv=none;
 b=iBzJOKrApmUFGERKv94RmFGeWyq4LiHrl8o6+caflX9XrfeLAlT7E562NGWBd48CyBm303PrfQScodKBUjFcRZaiwj05TvZkmLJdlTbi9nayLMEifKfFAcDOllPnt6QvlLSKQTC/qZxryS2vKOagI+fyhVa6xAJF+r6m6JhjqPc=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743629377; c=relaxed/simple;
	bh=EB2SWIX2MQDVxuPwHJP9YdKIaXyPqmolnJbgJbQw4IA=;
	h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From:
	 To:Cc:Content-Type;
 b=OmV+bnZp/16sYFmf2Lt4JUwdk4ss8WKovExIaFE9LBFaaSukRVkArm5+TiYuzUWH2cwaqkYIc70o3q/JJGqAWhOdmqW5G4zfSHiw/nLqf902yiojpOb1wh76yJSOJlrnWUVKd4aUObWaqSuUGZci4LPMqjKDJPGXSCXKpzm+9co=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=reject dis=none) header.from=google.com;
 spf=pass smtp.mailfrom=flex--zecheng.bounces.google.com;
 dkim=pass (2048-bit key) header.d=google.com header.i=@google.com
 header.b=ZGKQrT0R; arc=none smtp.client-ip=209.85.128.201
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=reject dis=none) header.from=google.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=flex--zecheng.bounces.google.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=google.com header.i=@google.com
 header.b="ZGKQrT0R"
Received: by mail-yw1-f201.google.com with SMTP id
 00721157ae682-6fecdb96e52so3913817b3.2
        for <linux-kernel@vger.kernel.org>;
 Wed, 02 Apr 2025 14:29:35 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=google.com; s=20230601; t=1743629375; x=1744234175;
 darn=vger.kernel.org;
        h=cc:to:from:subject:message-id:references:mime-version:in-reply-to
         :date:from:to:cc:subject:date:message-id:reply-to;
        bh=qE3xbxPdvmDug9clNwdu+4Xt6VEFfLm+kW+AsHbUMvM=;
        b=ZGKQrT0ROKsjBngxzGF2JR1F9j8O5B/bzA7ZDACBHkf6osXgsWlwJvELESVczwzgh0
         8xqGA08Vd0oUvo9a65bKw5mnRqUCdnUQm41Q+bEPT30XSrqGottddrCAuxg4SoloFEPt
         h1kt8MeJOC2Q7Jt9vENN5cM2Cv7PdVfrwCmWwAISqCevUf/IRfD4UYbBXZyI9jKeF6Id
         AtDcWTZwzAsk1slVX25sloz3nUBP1A3vmnwIEjjAoZUITQP3ZDv79DldxVEIgt4TeC2k
         m+b+Vggnt+QKmtVbTX6i+N+lGzt59gKhb0EMcNU9MwdCCo2v/Cc9PbH7j2Dq86kmNHvr
         lr0Q==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1743629375; x=1744234175;
        h=cc:to:from:subject:message-id:references:mime-version:in-reply-to
         :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;
        bh=qE3xbxPdvmDug9clNwdu+4Xt6VEFfLm+kW+AsHbUMvM=;
        b=nCNPuHMJCfz2QakZMbtv2+cQ+PvYL3rz2cpNSa0mb3oU69nM1nQYD6hsmJTMyrwmTr
         E85W/RWmQ5Y7KsIHxImjVKgFHjgWbfblWUPfjDSM8O5KcROV9WyS6zEV8OTRGkYo8w0g
         fVH/dvZwnv+VfnKfhlVcou1ym2/bFrEmNTqATYNt3XoWVFzE7GzQc+PyZwjMWW+BwvZu
         dDdVrpj8WSSfT0dur00wy4cgNWAuuS9PJhRnvJo/NwtxNYvvwSncEKQpvPpHVGe5tY5b
         Vf6rUdIdD8c1Xc41BM89vnXGB8drnBPK5p8BCDZwmQxxQtMppXwq+CCfDdxUs+R6H8Rf
         anvQ==
X-Forwarded-Encrypted: i=1;
 AJvYcCXxdKAuuurR3L9172LklH9K60QjwB6LAvbOLzso0IumtON86kCtSeNZoyW69jmdmka3xJ6k1vRBxQtFbQw=@vger.kernel.org
X-Gm-Message-State: AOJu0YwKwJPEc8k6OmlRm10fzL21+COtuUPYcldDg55W6J/NqNfDKKl+
	kXwlnlhGkIDrSBldAlJu/D6Qr5mZfruowZPgDaEP+h2yHRdDnk4etz2ylAMq195aedPw/zCndTu
	E02U1FA==
X-Google-Smtp-Source: 
 AGHT+IFVrtjcBvAqhHkqRDeAfUX4780AiqQ4c5vCIEbaJtnVTXAjV/lbfMRrp0h3QjJAtD+CLP4MyM3GKcPs
X-Received: from ybbgd10.prod.google.com
 ([2002:a05:6902:408a:b0:e61:1c55:6314])
 (user=zecheng job=prod-delivery.src-stubby-dispatcher) by
 2002:a05:6902:1b86:b0:e6d:fb0f:fca4
 with SMTP id 3f1490d57ef6-e6e0a100a05mr420085276.5.1743629374925; Wed, 02 Apr
 2025 14:29:34 -0700 (PDT)
Date: Wed,  2 Apr 2025 21:29:01 +0000
In-Reply-To: <20250402212904.8866-1-zecheng@google.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
Mime-Version: 1.0
References: <20250402212904.8866-1-zecheng@google.com>
X-Mailer: git-send-email 2.49.0.472.ge94155a9ec-goog
Message-ID: <20250402212904.8866-2-zecheng@google.com>
Subject: [RFC PATCH 1/2] sched/fair: Reorder struct cfs_rq
From: Zecheng Li <zecheng@google.com>
To: Ingo Molnar <mingo@redhat.com>, Peter Zijlstra <peterz@infradead.org>,
	Juri Lelli <juri.lelli@redhat.com>,
 Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>,
 Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Valentin Schneider <vschneid@redhat.com>, Xu Liu <xliuprof@google.com>,
	Blake Jones <blakejones@google.com>, Josh Don <joshdon@google.com>,
 linux-kernel@vger.kernel.org,
	Zecheng Li <zecheng@google.com>
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Hot fields are moved to the first two cache lines. The first cache line
is considered to be the hottest, and the second one is slightly cooler.
With all related CONFIG enabled, it also moves fields originally located
around the 4th and 5th cache line offsets to provide better locality
when executing CFS bandwidth control functions. Due to the removal of
holes in the struct, its size is observed to reduce by one cacheline in
an x86 system.

The following changes are proposed:

- Move `curr`, `rq`, `tg`, `throttle_count`, and `runtime_enabled` to
the first cache line as they are frequently accessed (and mostly read).
They are pointers to the closely related structs (`rq`, `tg`) or checked
as a condition (`curr`, `throttle_count` and `runtime_enabled`).

- `propagate` and `idle`, two frequently read fields, were placed in
separate cache lines. Group them in cache line 2 with the remaining
fields previously in cache line 1 to fill the hole.

- `on_list` is often accessed together with `throttle_clock_*` in
`tg_unthrottle_up` and `tg_throttle_down` functions. Move
`runtime_remaining` and `throttled_pelt_idle`, which are less frequently
accessed, to the previous cache line to allow grouping `on_list` and
throttle-related fields together.

- Use `__cacheline_group_*` macros to delineate logically grouped fields
for cache alignment, with compile-time checks added in
`cfs_rq_struct_check`.

Signed-off-by: Zecheng Li <zecheng@google.com>
---
 kernel/sched/core.c  | 61 +++++++++++++++++++++++++++++++++++++-
 kernel/sched/sched.h | 70 ++++++++++++++++++++++++++++----------------
 2 files changed, 104 insertions(+), 27 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 042351c7afce..84ee289d98d7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8473,6 +8473,8 @@ LIST_HEAD(task_groups);
 static struct kmem_cache *task_group_cache __ro_after_init;
 #endif
=20
+static void __init cfs_rq_struct_check(void);
+
 void __init sched_init(void)
 {
 	unsigned long ptr =3D 0;
@@ -8489,7 +8491,7 @@ void __init sched_init(void)
 	BUG_ON(!sched_class_above(&fair_sched_class, &ext_sched_class));
 	BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class));
 #endif
-
+	cfs_rq_struct_check();
 	wait_bit_init();
=20
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -10696,3 +10698,60 @@ void sched_enq_and_set_task(struct sched_enq_and_s=
et_ctx *ctx)
 		set_next_task(rq, ctx->p);
 }
 #endif	/* CONFIG_SCHED_CLASS_EXT */
+
+static void __init cfs_rq_struct_check(void)
+{
+	/*
+	 * The first two cache lines are hot and mostly read
+	 * except load.inv_weight
+	 */
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, load);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, nr_queued);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, h_nr_queued);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, h_nr_runnable);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, h_nr_idle);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, curr);
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, rq);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, tg);
+
+#ifdef CONFIG_CFS_BANDWIDTH
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, throttle_count);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, runtime_enabled);
+#endif
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, idle);
+
+#ifdef CONFIG_SMP
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, propagate);
+#endif
+#endif
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, avg_vruntime);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, avg_load);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, min_vruntime);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, tasks_timeline);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, hot, next);
+
+	/*
+	 * This cache line groups hot fields of the throttling functions.
+	 * This group is enabled when CFS_BANDWIDTH is configured.
+	 */
+#ifdef CONFIG_CFS_BANDWIDTH
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, throttle, throttled);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, throttle, on_list);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, throttle,
+				      leaf_cfs_rq_list);
+
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, throttle, throttled_clock);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, throttle,
+				      throttled_clock_pelt);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, throttle,
+				      throttled_clock_pelt_time);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, throttle,
+				      throttled_clock_self);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct cfs_rq, throttle,
+				      throttled_clock_self_time);
+#endif
+#endif
+}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 023b844159c9..3230b09a4959 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -649,29 +649,44 @@ struct balance_callback {
=20
 /* CFS-related fields in a runqueue */
 struct cfs_rq {
+	/* The first two cache lines are hot and mostly read */
+	__cacheline_group_begin_aligned(hot);
 	struct load_weight	load;
 	unsigned int		nr_queued;
 	unsigned int		h_nr_queued;       /* SCHED_{NORMAL,BATCH,IDLE} */
 	unsigned int		h_nr_runnable;     /* SCHED_{NORMAL,BATCH,IDLE} */
 	unsigned int		h_nr_idle; /* SCHED_IDLE */
+	/*
+	 * 'curr' points to currently running entity on this cfs_rq.
+	 * It is set to NULL otherwise (i.e when none are currently running).
+	 */
+	struct sched_entity	*curr;
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	struct rq		*rq;	/* CPU runqueue to which this cfs_rq is attached */
+	struct task_group	*tg;	/* group that "owns" this runqueue */
+
+#ifdef CONFIG_CFS_BANDWIDTH
+	int			throttle_count;
+	int			runtime_enabled;
+#endif
+	/* Locally cached copy of our task_group's idle value */
+	int			idle;
+
+#ifdef CONFIG_SMP
+	long			propagate;
+#endif /* CONFIG_SMP */
+#endif /* CONFIG_FAIR_GROUP_SCHED */
=20
 	s64			avg_vruntime;
 	u64			avg_load;
=20
 	u64			min_vruntime;
-#ifdef CONFIG_SCHED_CORE
-	unsigned int		forceidle_seq;
-	u64			min_vruntime_fi;
-#endif
=20
 	struct rb_root_cached	tasks_timeline;
=20
-	/*
-	 * 'curr' points to currently running entity on this cfs_rq.
-	 * It is set to NULL otherwise (i.e when none are currently running).
-	 */
-	struct sched_entity	*curr;
 	struct sched_entity	*next;
+	__cacheline_group_end_aligned(hot);
=20
 #ifdef CONFIG_SMP
 	/*
@@ -692,7 +707,6 @@ struct cfs_rq {
 #ifdef CONFIG_FAIR_GROUP_SCHED
 	u64			last_update_tg_load_avg;
 	unsigned long		tg_load_avg_contrib;
-	long			propagate;
 	long			prop_runnable_sum;
=20
 	/*
@@ -708,8 +722,19 @@ struct cfs_rq {
 #endif /* CONFIG_SMP */
=20
 #ifdef CONFIG_FAIR_GROUP_SCHED
-	struct rq		*rq;	/* CPU runqueue to which this cfs_rq is attached */
-
+#ifdef CONFIG_CFS_BANDWIDTH
+	s64			runtime_remaining;
+	u64			throttled_pelt_idle;
+#ifndef CONFIG_64BIT
+	u64                     throttled_pelt_idle_copy;
+#endif
+	/*
+	 * This cache line groups hot fields of the throttling functions.
+	 * This group is enabled when CFS_BANDWIDTH is configured.
+	 */
+	__cacheline_group_begin_aligned(throttle);
+	int			throttled;
+#endif /* CONFIG_CFS_BANDWIDTH */
 	/*
 	 * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in
 	 * a hierarchy). Non-leaf lrqs hold other higher schedulable entities
@@ -720,30 +745,23 @@ struct cfs_rq {
 	 */
 	int			on_list;
 	struct list_head	leaf_cfs_rq_list;
-	struct task_group	*tg;	/* group that "owns" this runqueue */
-
-	/* Locally cached copy of our task_group's idle value */
-	int			idle;
-
 #ifdef CONFIG_CFS_BANDWIDTH
-	int			runtime_enabled;
-	s64			runtime_remaining;
-
-	u64			throttled_pelt_idle;
-#ifndef CONFIG_64BIT
-	u64                     throttled_pelt_idle_copy;
-#endif
 	u64			throttled_clock;
 	u64			throttled_clock_pelt;
 	u64			throttled_clock_pelt_time;
 	u64			throttled_clock_self;
 	u64			throttled_clock_self_time;
-	int			throttled;
-	int			throttle_count;
+	__cacheline_group_end_aligned(throttle);
+
 	struct list_head	throttled_list;
 	struct list_head	throttled_csd_list;
 #endif /* CONFIG_CFS_BANDWIDTH */
 #endif /* CONFIG_FAIR_GROUP_SCHED */
+
+#ifdef CONFIG_SCHED_CORE
+	unsigned int		forceidle_seq;
+	u64			min_vruntime_fi;
+#endif
 };
=20
 #ifdef CONFIG_SCHED_CLASS_EXT
--=20
2.49.0
From nobody Mon Apr  7 14:55:04 2025
Received: from mail-qk1-f201.google.com (mail-qk1-f201.google.com
 [209.85.222.201])
	(using TLSv1.2 with cipher ECDHE-RSA-AES128-GCM-SHA256 (128/128 bits))
	(No client certificate requested)
	by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3B2461F4626
	for <linux-kernel@vger.kernel.org>; Wed,  2 Apr 2025 21:29:37 +0000 (UTC)
Authentication-Results: smtp.subspace.kernel.org;
 arc=none smtp.client-ip=209.85.222.201
ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116;
	t=1743629379; cv=none;
 b=smx1yakToDd9mlNwIhH31rjjBeI/77yVVtKpCXuqVfPDdIKUpN96AtwKAjHIUHC8KRg8kSCvUDajPlxzBmgGdJsWseznHKp5g9gyXt5gKask/RhfUGbIcQVqwZFG/nUmT3w6EOnqxXZYDj8T8wWi1Al5T+JYW3c3yonnzNAFEzY=
ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org;
	s=arc-20240116; t=1743629379; c=relaxed/simple;
	bh=2G7UjKnIOjEPAPd3NhOenv5qzdjdy1SHNhYpKWoUmIc=;
	h=Date:In-Reply-To:Mime-Version:References:Message-ID:Subject:From:
	 To:Cc:Content-Type;
 b=IXUXO2JlJlZpeAOUTW3OuapCVWWY96tXZU6Vyqx2QwiLIlsgC4CpoACZdXJQWlf7tyD3RGt0/+V3jQ5t26Y0Bz4MxT9eEQ+no1mYHJSVgF1CYKRC76mS+QlnkRCU8UIAW5v8BbjOV8kKR+Hfz8BRw9bXb5LAXzFp2VwGoNHrj88=
ARC-Authentication-Results: i=1; smtp.subspace.kernel.org;
 dmarc=pass (p=reject dis=none) header.from=google.com;
 spf=pass smtp.mailfrom=flex--zecheng.bounces.google.com;
 dkim=pass (2048-bit key) header.d=google.com header.i=@google.com
 header.b=k6vEFLwa; arc=none smtp.client-ip=209.85.222.201
Authentication-Results: smtp.subspace.kernel.org;
 dmarc=pass (p=reject dis=none) header.from=google.com
Authentication-Results: smtp.subspace.kernel.org;
 spf=pass smtp.mailfrom=flex--zecheng.bounces.google.com
Authentication-Results: smtp.subspace.kernel.org;
	dkim=pass (2048-bit key) header.d=google.com header.i=@google.com
 header.b="k6vEFLwa"
Received: by mail-qk1-f201.google.com with SMTP id
 af79cd13be357-7c5d608e6f5so54289485a.0
        for <linux-kernel@vger.kernel.org>;
 Wed, 02 Apr 2025 14:29:37 -0700 (PDT)
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=google.com; s=20230601; t=1743629377; x=1744234177;
 darn=vger.kernel.org;
        h=cc:to:from:subject:message-id:references:mime-version:in-reply-to
         :date:from:to:cc:subject:date:message-id:reply-to;
        bh=rU8F8RZIMGWJj4A3yvo+APx32OMnN5qYRGx2eoq/58A=;
        b=k6vEFLwa3Up4s3l+Fyyju5wldd/ZodELFNHvScWDf8Rq9h1zWKwR86GfWxJXTXqY06
         KgXE3ui1g4aoMA2yeif6hCpRG0wqAXuQDhWaRr1bTIaqACtXzwisHuFoQzAhjYaYhB97
         LexAThitr4r/TPe94TQdh+zCeOSjnfJcGZW/DpIUzDPvfNSmbJ9qYBWEIx+7aC4wU8yb
         HkCaLqt5csymV4g02dbm2IzXfcz06fLQMzp+PCfVcXXyYbXqETUYHnKwx1mFvvxe14R3
         dKIOi2jN1jgjsTQoMW2TEuMmF8rdW2XKvYM7Z8UUvSEdxJ9uD4kT11waUGCW7nw+/RN8
         aJjA==
X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed;
        d=1e100.net; s=20230601; t=1743629377; x=1744234177;
        h=cc:to:from:subject:message-id:references:mime-version:in-reply-to
         :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to;
        bh=rU8F8RZIMGWJj4A3yvo+APx32OMnN5qYRGx2eoq/58A=;
        b=SbGRTzeWs7Exstqlrxgj/sFRFVQW/Jp5rAscfgDj/Z/N7NLKso/Ot5jmgERZgyMU0o
         Eal4Zr7mF+wukFWL+MaU5I/5O/xJnr2Cw4nAHfeyM+bD0SZhz0qA4KEO0rFivJDlBzZM
         xAhAOkOHHGeN5TcF9BBE+xgRexGUaxWzmh0BwQyI3jReYQBlq4102GJCU+K7vfLVZxqx
         wWPXJahvQDU54YTUzdr2EyIRXrHQfXSu338kjemSV8uELviuPRLeWr9zbGb7c8dwNzIH
         wMTJLfRCRzQm4w8HP6XWeSqeIroVMKI0xAh8ezirJ5fj3x9xH/kvWVAjHXoCteKwBL65
         3n0A==
X-Forwarded-Encrypted: i=1;
 AJvYcCVvjpsn7mLWBpPxQsqQgNbPcG000XJTlujgiPjW/LmVuggtg5a3IX3dqe8JJiYNhhd025k7kfg6jvSW4pc=@vger.kernel.org
X-Gm-Message-State: AOJu0YwW72V450AQbrg1bN0zbAZw79y0mAPINddg2Ijlx1cHZ787ZB9u
	KbV9pGp5LcVG8XF2q6ICsVSdx5TnG6mwaK28ISsrl/UthMrLWs9Y/dtEGv14Ur4OnJTd/NWrz80
	0T/Tkdw==
X-Google-Smtp-Source: 
 AGHT+IEmwLTLK2l/g92J7Xh04IuA4bjjXeYkHBGRcQOgu+8USi5mFl6Ah6iy2T14rAfCQa8zwhyGXvZPRV8P
X-Received: from qknri8.prod.google.com
 ([2002:a05:620a:8f88:b0:7c3:cf05:bbdf])
 (user=zecheng job=prod-delivery.src-stubby-dispatcher) by
 2002:a05:620a:4487:b0:7c5:6291:904d
 with SMTP id af79cd13be357-7c76dfc5844mr19068485a.38.1743629377115; Wed, 02
 Apr 2025 14:29:37 -0700 (PDT)
Date: Wed,  2 Apr 2025 21:29:02 +0000
In-Reply-To: <20250402212904.8866-1-zecheng@google.com>
Precedence: bulk
X-Mailing-List: linux-kernel@vger.kernel.org
List-Id: <linux-kernel.vger.kernel.org>
List-Subscribe: <mailto:linux-kernel+subscribe@vger.kernel.org>
List-Unsubscribe: <mailto:linux-kernel+unsubscribe@vger.kernel.org>
Mime-Version: 1.0
References: <20250402212904.8866-1-zecheng@google.com>
X-Mailer: git-send-email 2.49.0.472.ge94155a9ec-goog
Message-ID: <20250402212904.8866-3-zecheng@google.com>
Subject: [RFC PATCH 2/2] sched/fair: Reorder struct sched_entity
From: Zecheng Li <zecheng@google.com>
To: Ingo Molnar <mingo@redhat.com>, Peter Zijlstra <peterz@infradead.org>,
	Juri Lelli <juri.lelli@redhat.com>,
 Vincent Guittot <vincent.guittot@linaro.org>
Cc: Dietmar Eggemann <dietmar.eggemann@arm.com>,
 Steven Rostedt <rostedt@goodmis.org>,
	Ben Segall <bsegall@google.com>, Mel Gorman <mgorman@suse.de>,
	Valentin Schneider <vschneid@redhat.com>, Xu Liu <xliuprof@google.com>,
	Blake Jones <blakejones@google.com>, Josh Don <joshdon@google.com>,
 linux-kernel@vger.kernel.org,
	Zecheng Li <zecheng@google.com>
Content-Transfer-Encoding: quoted-printable
Content-Type: text/plain; charset="utf-8"

Group the mostly read fields in struct sched_entity to the first
cacheline when `CONFIG_FAIR_GROUP_SCHED` is set. This moves the
additional fields from `CONFIG_FAIR_GROUP_SCHED` to the first cache line
since they are mostly accessed and generally read most. Currently these
fields related to cfs cgroup scheduling is placed on a separate
cacheline from hot fields `load`, `on_rq` and `vruntime`. Although
`depth` is not as hot as other fields, we keep it here to avoid breaking
the #ifdef boundaries.

Also adds a compile time check when `CONFIG_FAIR_GROUP_SCHED` is set to
check the placement of the hot fields.

Signed-off-by: Zecheng Li <zecheng@google.com>
---
 include/linux/sched.h | 37 ++++++++++++++++++++-----------------
 kernel/sched/core.c   | 20 ++++++++++++++++++++
 2 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c15365a30c0..e9f58254999d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -545,40 +545,43 @@ struct sched_statistics {
 } ____cacheline_aligned;
=20
 struct sched_entity {
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	/* Group the read most hot fields in sched_entity in a cache line */
+	__cacheline_group_begin_aligned(hot);
+	struct sched_entity		*parent;
+	/* rq on which this entity is (to be) queued: */
+	struct cfs_rq			*cfs_rq;
+	/* rq "owned" by this entity/group: */
+	struct cfs_rq			*my_q;
+	/* cached value of my_q->h_nr_running */
+	unsigned long			runnable_weight;
+	int				depth;
+#endif
+	unsigned char			on_rq;
+	unsigned char			sched_delayed;
+	unsigned char			rel_deadline;
+	unsigned char			custom_slice;
 	/* For load-balancing: */
 	struct load_weight		load;
+	u64				vruntime;
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	__cacheline_group_end_aligned(hot);
+#endif
 	struct rb_node			run_node;
 	u64				deadline;
 	u64				min_vruntime;
 	u64				min_slice;
=20
 	struct list_head		group_node;
-	unsigned char			on_rq;
-	unsigned char			sched_delayed;
-	unsigned char			rel_deadline;
-	unsigned char			custom_slice;
-					/* hole */
=20
 	u64				exec_start;
 	u64				sum_exec_runtime;
 	u64				prev_sum_exec_runtime;
-	u64				vruntime;
 	s64				vlag;
 	u64				slice;
=20
 	u64				nr_migrations;
=20
-#ifdef CONFIG_FAIR_GROUP_SCHED
-	int				depth;
-	struct sched_entity		*parent;
-	/* rq on which this entity is (to be) queued: */
-	struct cfs_rq			*cfs_rq;
-	/* rq "owned" by this entity/group: */
-	struct cfs_rq			*my_q;
-	/* cached value of my_q->h_nr_running */
-	unsigned long			runnable_weight;
-#endif
-
 #ifdef CONFIG_SMP
 	/*
 	 * Per entity load average tracking.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 84ee289d98d7..58bcd7d55eca 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8474,6 +8474,7 @@ static struct kmem_cache *task_group_cache __ro_after=
_init;
 #endif
=20
 static void __init cfs_rq_struct_check(void);
+static void __init sched_entity_struct_check(void);
=20
 void __init sched_init(void)
 {
@@ -8492,6 +8493,7 @@ void __init sched_init(void)
 	BUG_ON(!sched_class_above(&ext_sched_class, &idle_sched_class));
 #endif
 	cfs_rq_struct_check();
+	sched_entity_struct_check();
 	wait_bit_init();
=20
 #ifdef CONFIG_FAIR_GROUP_SCHED
@@ -10755,3 +10757,21 @@ static void __init cfs_rq_struct_check(void)
 #endif
 #endif
 }
+
+static void __init sched_entity_struct_check(void)
+{
+	/*
+	 * The compile time check is only enabled with CONFIG_FAIR_GROUP_SCHED.
+	 * We care about the placement of six hottest fields below.
+	 */
+#ifdef CONFIG_FAIR_GROUP_SCHED
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, parent);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, cfs_rq);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, my_q);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot,
+				      runnable_weight);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, on_rq);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, load);
+	CACHELINE_ASSERT_GROUP_MEMBER(struct sched_entity, hot, vruntime);
+#endif
+}
--=20
2.49.0