From nobody Sat Feb 7 04:47:13 2026 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by smtp.subspace.kernel.org (Postfix) with ESMTP id 729501DA62E for ; Mon, 2 Feb 2026 15:32:23 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=217.140.110.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1770046345; cv=none; b=bMkUOl0T5kHAz7T0FBUgkVAo86aGoy4AVU/wMtWASWBqwaua2yp1FVrZvvr1nI5BiHBaygPF9Srep3MFGl+U6OGfwKtEJ3mVV2KqCvA/31OvhL1HajsNrm61mq99yMzWEMnSc2snpRAAsun3RIi8ePoRSgh+YedxJISnVHehZ08= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1770046345; c=relaxed/simple; bh=vVhB2goWV2yqN3T7laDTMPdS4JQPpuyl5Kl0CAaV0hw=; h=From:To:Cc:Subject:Date:Message-Id:MIME-Version:Content-Type; b=UmuAkllRIW14zIwtCQUJZyHQkTbKn8RK4L+zQOWDT3IjTYLCYx+E6kRpI0OBwdeVj8RWAkvBwBUIUz55DRjsmyhD1LasStejQO1zrUUymeQyKb91BjQr28cNXGAwpRB+OMIkokpA+CBdjO90zx7c9aCu8Cnbs5VoIAeGcd8vQHM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com; spf=pass smtp.mailfrom=arm.com; arc=none smtp.client-ip=217.140.110.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=none dis=none) header.from=arm.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=arm.com Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 7E7B3339; Mon, 2 Feb 2026 07:32:16 -0800 (PST) Received: from e127648.arm.com (unknown [10.57.68.168]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id DE5863F740; Mon, 2 Feb 2026 07:32:20 -0800 (PST) From: Christian Loehle To: linux-kernel@vger.kernel.org, peterz@infradead.org, mingo@redhat.com Cc: juri.lelli@redhat.com, dietmar.eggemann@arm.com, kprateek.nayak@amd.com, pierre.gondois@arm.com, vincent.guittot@linaro.org, Christian Loehle Subject: [PATCH] sched/fair: Skip SCHED_IDLE rq for SCHED_IDLE task Date: Mon, 2 Feb 2026 15:32:02 +0000 Message-Id: <20260202153202.1295391-1-christian.loehle@arm.com> X-Mailer: git-send-email 2.34.1 Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable CPUs whose rq only have SCHED_IDLE tasks running are preferred over true idle CPUs in many cases, this is because they are guaranteed to not be in an idle state and might even be in a higher P-state. This reasoning is based on the assumption that the task (e.g. wakee) gets to run there immediately and isn't sharing the rq. This however isn't true if the task has SCHED_IDLE policy itself, then we are better off to continue looking for a true idle CPU. On a Intel Xeon 2-socket with 64 logical cores in total this yields for kernel compilation using SCHED_IDLE: +---------+----------------------+----------------------+--------+ | workers | mainline (seconds) | patch (seconds) | delta% | +=3D=3D=3D=3D=3D=3D=3D=3D=3D+=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D+=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D+=3D=3D=3D=3D=3D=3D=3D=3D+ | 1 | 4384.728 =C2=B1 21.085 | 3843.250 =C2=B1 16.235 | -12.35 | | 2 | 2242.513 =C2=B1 2.099 | 1971.696 =C2=B1 2.842 | -12.08 | | 4 | 1199.324 =C2=B1 1.823 | 1033.744 =C2=B1 1.803 | -13.81 | | 8 | 649.083 =C2=B1 1.959 | 559.123 =C2=B1 4.301 | -13.86 | | 16 | 370.425 =C2=B1 0.915 | 325.906 =C2=B1 4.623 | -12.02 | | 32 | 234.651 =C2=B1 2.255 | 217.266 =C2=B1 0.253 | -7.41 | | 64 | 202.286 =C2=B1 1.452 | 197.977 =C2=B1 2.275 | -2.13 | | 128 | 217.092 =C2=B1 1.687 | 212.164 =C2=B1 1.138 | -2.27 | +---------+----------------------+----------------------+--------+ Signed-off-by: Christian Loehle --- kernel/sched/fair.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3eaeceda71b0..b29fa04958f0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -6832,9 +6832,10 @@ static int sched_idle_rq(struct rq *rq) rq->nr_running); } =20 -static int sched_idle_cpu(int cpu) +static int choose_idle_cpu(int cpu, struct task_struct *p) { - return sched_idle_rq(cpu_rq(cpu)); + return available_idle_cpu(cpu) || + (sched_idle_rq(cpu_rq(cpu)) && !task_has_idle_policy(p)); } =20 static void @@ -7400,7 +7401,7 @@ sched_balance_find_dst_group_cpu(struct sched_group *= group, struct task_struct * if (!sched_core_cookie_match(rq, p)) continue; =20 - if (sched_idle_cpu(i)) + if (sched_idle_rq(rq) && !task_has_idle_policy(p)) return i; =20 if (available_idle_cpu(i)) { @@ -7491,8 +7492,7 @@ static inline int sched_balance_find_dst_cpu(struct s= ched_domain *sd, struct tas =20 static inline int __select_idle_cpu(int cpu, struct task_struct *p) { - if ((available_idle_cpu(cpu) || sched_idle_cpu(cpu)) && - sched_cpu_cookie_match(cpu_rq(cpu), p)) + if (choose_idle_cpu(cpu, p) && sched_cpu_cookie_match(cpu_rq(cpu), p)) return cpu; =20 return -1; @@ -7565,7 +7565,9 @@ static int select_idle_core(struct task_struct *p, in= t core, struct cpumask *cpu if (!available_idle_cpu(cpu)) { idle =3D false; if (*idle_cpu =3D=3D -1) { - if (sched_idle_cpu(cpu) && cpumask_test_cpu(cpu, cpus)) { + if (sched_idle_rq(cpu_rq(cpu)) && + !task_has_idle_policy(p) && + cpumask_test_cpu(cpu, cpus)) { *idle_cpu =3D cpu; break; } @@ -7600,7 +7602,7 @@ static int select_idle_smt(struct task_struct *p, str= uct sched_domain *sd, int t */ if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) continue; - if (available_idle_cpu(cpu) || sched_idle_cpu(cpu)) + if (choose_idle_cpu(cpu, p)) return cpu; } =20 @@ -7722,7 +7724,7 @@ select_idle_capacity(struct task_struct *p, struct sc= hed_domain *sd, int target) for_each_cpu_wrap(cpu, cpus, target) { unsigned long cpu_cap =3D capacity_of(cpu); =20 - if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu)) + if (!choose_idle_cpu(cpu, p)) continue; =20 fits =3D util_fits_cpu(task_util, util_min, util_max, cpu); @@ -7793,7 +7795,7 @@ static int select_idle_sibling(struct task_struct *p,= int prev, int target) */ lockdep_assert_irqs_disabled(); =20 - if ((available_idle_cpu(target) || sched_idle_cpu(target)) && + if (choose_idle_cpu(target, p) && asym_fits_cpu(task_util, util_min, util_max, target)) return target; =20 @@ -7801,7 +7803,7 @@ static int select_idle_sibling(struct task_struct *p,= int prev, int target) * If the previous CPU is cache affine and idle, don't be stupid: */ if (prev !=3D target && cpus_share_cache(prev, target) && - (available_idle_cpu(prev) || sched_idle_cpu(prev)) && + choose_idle_cpu(prev, p) && asym_fits_cpu(task_util, util_min, util_max, prev)) { =20 if (!static_branch_unlikely(&sched_cluster_active) || @@ -7833,7 +7835,7 @@ static int select_idle_sibling(struct task_struct *p,= int prev, int target) if (recent_used_cpu !=3D prev && recent_used_cpu !=3D target && cpus_share_cache(recent_used_cpu, target) && - (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cp= u)) && + choose_idle_cpu(recent_used_cpu, p) && cpumask_test_cpu(recent_used_cpu, p->cpus_ptr) && asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) { =20 @@ -12261,7 +12263,7 @@ static void sched_balance_domains(struct rq *rq, en= um cpu_idle_type idle) { int continue_balancing =3D 1; int cpu =3D rq->cpu; - int busy =3D idle !=3D CPU_IDLE && !sched_idle_cpu(cpu); + int busy =3D idle !=3D CPU_IDLE && !sched_idle_rq(rq); unsigned long interval; struct sched_domain *sd; /* Earliest time when we have to do rebalance again */ @@ -12299,7 +12301,7 @@ static void sched_balance_domains(struct rq *rq, en= um cpu_idle_type idle) * state even if we migrated tasks. Update it. */ idle =3D idle_cpu(cpu); - busy =3D !idle && !sched_idle_cpu(cpu); + busy =3D !idle && !sched_idle_rq(rq); } sd->last_balance =3D jiffies; interval =3D get_sd_balance_interval(sd, busy); --=20 2.34.1