From nobody Mon Apr 6 16:12:46 2026 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7B3E1C4332F for ; Mon, 3 Oct 2022 03:24:36 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S229540AbiJCDTr (ORCPT ); Sun, 2 Oct 2022 23:19:47 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:33494 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229530AbiJCDTp (ORCPT ); Sun, 2 Oct 2022 23:19:45 -0400 Received: from out30-130.freemail.mail.aliyun.com (out30-130.freemail.mail.aliyun.com [115.124.30.130]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id E6BE2632B for ; Sun, 2 Oct 2022 20:19:41 -0700 (PDT) X-Alimail-AntiSpam: AC=PASS;BC=-1|-1;BR=01201311R631e4;CH=green;DM=||false|;DS=||;FP=0|-1|-1|-1|0|-1|-1|-1;HT=ay29a033018045176;MF=cruzzhao@linux.alibaba.com;NM=1;PH=DS;RN=11;SR=0;TI=SMTPD_---0VR8plep_1664767168; Received: from rt2b04371.sqa.tbc.tbsite.net(mailfrom:CruzZhao@linux.alibaba.com fp:SMTPD_---0VR8plep_1664767168) by smtp.aliyun-inc.com; Mon, 03 Oct 2022 11:19:38 +0800 From: Cruz Zhao To: mingo@redhat.com, peterz@infradead.org, juri.lelli@redhat.com, vincent.guittot@linaro.org, dietmar.eggemann@arm.com, rostedt@goodmis.org, bsegall@google.com, mgorman@suse.de, bristot@redhat.com, vschneid@redhat.com Cc: linux-kernel@vger.kernel.org Subject: [PATCH v2] sched/core: Optimize the process of picking the max prio task for the core Date: Mon, 3 Oct 2022 11:19:28 +0800 Message-Id: <1664767168-30029-1-git-send-email-CruzZhao@linux.alibaba.com> X-Mailer: git-send-email 1.8.3.1 Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" When we pick the max prio task for the core in the case of sched_core_enabled(), if there's a task with a higher prio sched class in the runqueue of a SMT, it's not necessary for other SMTs to traverse lower prio sched classes. So we can change the traversal order: for each sched class, if there exists a max prio task among the core, pick it and break the loop. To compare the prio of the tasks with the same sched class, we introduce sched_class::prio_less(). Signed-off-by: Cruz Zhao Reported-by: kernel test robot --- Changes since v1: Fixes the compile error. --- kernel/sched/core.c | 33 ++++++++++++++++++++++----------- kernel/sched/deadline.c | 11 +++++++++++ kernel/sched/fair.c | 5 ++++- kernel/sched/idle.c | 10 ++++++++++ kernel/sched/rt.c | 10 ++++++++++ kernel/sched/sched.h | 5 +++-- kernel/sched/stop_task.c | 10 ++++++++++ 7 files changed, 70 insertions(+), 14 deletions(-) diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a2d8a1f..50e28c8 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -189,7 +189,7 @@ static inline bool prio_less(struct task_struct *a, str= uct task_struct *b, bool return !dl_time_before(a->dl.deadline, b->dl.deadline); =20 if (pa =3D=3D MAX_RT_PRIO + MAX_NICE) /* fair */ - return cfs_prio_less(a, b, in_fi); + return fair_sched_class.prio_less(a, b, in_fi); =20 return false; } @@ -5886,6 +5886,7 @@ static inline struct task_struct *pick_task(struct rq= *rq) int i, cpu, occ =3D 0; struct rq *rq_i; bool need_sync; + struct sched_class *class; =20 if (!sched_core_enabled(rq)) return __pick_next_task(rq, prev, rf); @@ -5978,12 +5979,6 @@ static inline struct task_struct *pick_task(struct r= q *rq) } } =20 - /* - * For each thread: do the regular task pick and find the max prio task - * amongst them. - * - * Tie-break prio towards the current CPU - */ for_each_cpu_wrap(i, smt_mask, cpu) { rq_i =3D cpu_rq(i); =20 @@ -5994,12 +5989,28 @@ static inline struct task_struct *pick_task(struct = rq *rq) */ if (i !=3D cpu && (rq_i !=3D rq->core || !core_clock_updated)) update_rq_clock(rq_i); + } =20 - p =3D rq_i->core_pick =3D pick_task(rq_i); - if (!max || prio_less(max, p, fi_before)) - max =3D p; + /* + * For each thread: do the regular task pick and find the max prio task + * amongst them. + * + * Tie-break prio towards the current CPU + */ + for_each_class(class) { + for_each_cpu_wrap(i, smt_mask, cpu) { + rq_i =3D cpu_rq(i); + p =3D rq_i->core_pick =3D class->pick_task(rq_i); + if (!max || (p && class->prio_less(max, p, fi_before))) + max =3D p; + } + if (max) + break; } =20 + if (!max) + BUG(); + cookie =3D rq->core->core_cookie =3D max->core_cookie; =20 /* @@ -6010,7 +6021,7 @@ static inline struct task_struct *pick_task(struct rq= *rq) rq_i =3D cpu_rq(i); p =3D rq_i->core_pick; =20 - if (!cookie_equals(p, cookie)) { + if (!p || !cookie_equals(p, cookie)) { p =3D NULL; if (cookie) p =3D sched_core_find(rq_i, cookie); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 86dea6a..8e7aa7d 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1942,6 +1942,7 @@ static int balance_dl(struct rq *rq, struct task_stru= ct *p, struct rq_flags *rf) =20 return sched_stop_runnable(rq) || sched_dl_runnable(rq); } + #endif /* CONFIG_SMP */ =20 /* @@ -2054,6 +2055,13 @@ static void put_prev_task_dl(struct rq *rq, struct t= ask_struct *p) enqueue_pushable_dl_task(rq, p); } =20 +#ifdef CONFIG_SCHED_CORE +static bool prio_less_dl(struct task_struct *a, struct task_struct *b, boo= l in_fi) +{ + return !dl_time_before(a->dl.deadline, b->dl.deadline); +} +#endif + /* * scheduler tick hitting a task of our scheduling class. * @@ -2704,6 +2712,9 @@ static void prio_changed_dl(struct rq *rq, struct tas= k_struct *p, .pick_next_task =3D pick_next_task_dl, .put_prev_task =3D put_prev_task_dl, .set_next_task =3D set_next_task_dl, +#ifdef CONFIG_SCHED_CORE + .prio_less =3D prio_less_dl, +#endif =20 #ifdef CONFIG_SMP .balance =3D balance_dl, diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e4a0b8b..eca2636 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -11516,7 +11516,7 @@ void task_vruntime_update(struct rq *rq, struct tas= k_struct *p, bool in_fi) se_fi_update(se, rq->core->core_forceidle_seq, in_fi); } =20 -bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool in_f= i) +static bool prio_less_fair(struct task_struct *a, struct task_struct *b, b= ool in_fi) { struct rq *rq =3D task_rq(a); struct sched_entity *sea =3D &a->se; @@ -12159,6 +12159,9 @@ static unsigned int get_rr_interval_fair(struct rq = *rq, struct task_struct *task .pick_next_task =3D __pick_next_task_fair, .put_prev_task =3D put_prev_task_fair, .set_next_task =3D set_next_task_fair, +#ifdef CONFIG_SCHED_CORE + .prio_less =3D prio_less_fair, +#endif =20 #ifdef CONFIG_SMP .balance =3D balance_fair, diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index f26ab26..a3c3e37 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -436,6 +436,13 @@ static void set_next_task_idle(struct rq *rq, struct t= ask_struct *next, bool fir schedstat_inc(rq->sched_goidle); } =20 +#ifdef CONFIG_SCHED_CORE +static bool prio_less_idle(struct task_struct *a, struct task_struct *b, b= ool in_fi) +{ + return false; +} +#endif + #ifdef CONFIG_SMP static struct task_struct *pick_task_idle(struct rq *rq) { @@ -507,6 +514,9 @@ static void update_curr_idle(struct rq *rq) .pick_next_task =3D pick_next_task_idle, .put_prev_task =3D put_prev_task_idle, .set_next_task =3D set_next_task_idle, +#ifdef CONFIG_SCHED_CORE + .prio_less =3D prio_less_idle, +#endif =20 #ifdef CONFIG_SMP .balance =3D balance_idle, diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d869bcf..4459a90 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1838,6 +1838,13 @@ static void put_prev_task_rt(struct rq *rq, struct t= ask_struct *p) enqueue_pushable_task(rq, p); } =20 +static bool prio_less_rt(struct task_struct *a, struct task_struct *b, boo= l in_fi) +{ + int pa =3D rt_prio(a->prio), pb =3D rt_prio(b->prio); + + return -pa < -pb; +} + #ifdef CONFIG_SMP =20 /* Only try algorithms three times */ @@ -2685,6 +2692,9 @@ static unsigned int get_rr_interval_rt(struct rq *rq,= struct task_struct *task) .pick_next_task =3D pick_next_task_rt, .put_prev_task =3D put_prev_task_rt, .set_next_task =3D set_next_task_rt, +#ifdef CONFIG_SCHED_CORE + .prio_less =3D prio_less_rt, +#endif =20 #ifdef CONFIG_SMP .balance =3D balance_rt, diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1644242..a7c6d10 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1218,8 +1218,6 @@ static inline raw_spinlock_t *__rq_lockp(struct rq *r= q) return &rq->__lock; } =20 -bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool fi); - /* * Helpers to check if the CPU's core cookie matches with the task's cookie * when core scheduling is enabled. @@ -2155,6 +2153,9 @@ struct sched_class { =20 void (*put_prev_task)(struct rq *rq, struct task_struct *p); void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first); +#ifdef CONFIG_SCHED_CORE + bool (*prio_less)(struct task_struct *a, struct task_struct *b, bool in_f= i); +#endif =20 #ifdef CONFIG_SMP int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *= rf); diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 8559059..c9ddaaa 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -84,6 +84,13 @@ static void put_prev_task_stop(struct rq *rq, struct tas= k_struct *prev) update_current_exec_runtime(curr, now, delta_exec); } =20 +#ifdef CONFIG_SCHED_CORE +static bool prio_less_stop(struct task_struct *a, struct task_struct *b, b= ool in_fi) +{ + return false; +} +#endif + /* * scheduler tick hitting a task of our scheduling class. * @@ -125,6 +132,9 @@ static void update_curr_stop(struct rq *rq) .pick_next_task =3D pick_next_task_stop, .put_prev_task =3D put_prev_task_stop, .set_next_task =3D set_next_task_stop, +#ifdef CONFIG_SCHED_CORE + .prio_less =3D prio_less_stop, +#endif =20 #ifdef CONFIG_SMP .balance =3D balance_stop, --=20 1.8.3.1