kernel/sched/ext.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-)
For the PREEMPT_RT kernels, the scx_bypass_lb_timerfn() running in the
preemptible per-CPU ktimer kthread context, this means that the following
scenarios will occur(for x86 platform):
cpu1 cpu2
ktimer kthread:
->scx_bypass_lb_timerfn
->bypass_lb_node
->for_each_cpu(cpu, resched_mask)
migration/1: by preempt by migration/2:
multi_cpu_stop() multi_cpu_stop()
->take_cpu_down()
->__cpu_disable()
->set cpu1 offline
->rq1 = cpu_rq(cpu1)
->resched_curr(rq1)
->smp_send_reschedule(cpu1)
->native_smp_send_reschedule(cpu1)
->if(unlikely(cpu_is_offline(cpu))) {
WARN(1, "sched: Unexpected
reschedule of offline CPU#%d!\n", cpu);
return;
}
This commit therefore use the resched_cpu() to replace resched_curr()
in the bypass_lb_node() to avoid send-ipi to offline CPUs.
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
---
kernel/sched/ext.c | 9 ++-------
1 file changed, 2 insertions(+), 7 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 5ebf8a740847..8f6d8d7f895c 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3956,13 +3956,8 @@ static void bypass_lb_node(struct scx_sched *sch, int node)
nr_donor_target, nr_target);
}
- for_each_cpu(cpu, resched_mask) {
- struct rq *rq = cpu_rq(cpu);
-
- raw_spin_rq_lock_irq(rq);
- resched_curr(rq);
- raw_spin_rq_unlock_irq(rq);
- }
+ for_each_cpu(cpu, resched_mask)
+ resched_cpu(cpu);
for_each_cpu_and(cpu, cpu_online_mask, node_mask) {
u32 nr = READ_ONCE(cpu_rq(cpu)->scx.bypass_dsq.nr);
--
2.17.1
On Mon, Dec 22, 2025 at 07:53:17PM +0800, Zqiang wrote:
> For the PREEMPT_RT kernels, the scx_bypass_lb_timerfn() running in the
> preemptible per-CPU ktimer kthread context, this means that the following
> scenarios will occur(for x86 platform):
>
> cpu1 cpu2
> ktimer kthread:
> ->scx_bypass_lb_timerfn
> ->bypass_lb_node
> ->for_each_cpu(cpu, resched_mask)
>
> migration/1: by preempt by migration/2:
> multi_cpu_stop() multi_cpu_stop()
> ->take_cpu_down()
> ->__cpu_disable()
> ->set cpu1 offline
>
> ->rq1 = cpu_rq(cpu1)
> ->resched_curr(rq1)
> ->smp_send_reschedule(cpu1)
> ->native_smp_send_reschedule(cpu1)
> ->if(unlikely(cpu_is_offline(cpu))) {
> WARN(1, "sched: Unexpected
> reschedule of offline CPU#%d!\n", cpu);
> return;
> }
>
> This commit therefore use the resched_cpu() to replace resched_curr()
> in the bypass_lb_node() to avoid send-ipi to offline CPUs.
>
> Signed-off-by: Zqiang <qiang.zhang@linux.dev>
Good catch, resched_cpu() checks for online CPUs, so makes sense to me.
Reviewed-by: Andrea Righi <arighi@nvidia.com>
Thanks,
-Andrea
> ---
> kernel/sched/ext.c | 9 ++-------
> 1 file changed, 2 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
> index 5ebf8a740847..8f6d8d7f895c 100644
> --- a/kernel/sched/ext.c
> +++ b/kernel/sched/ext.c
> @@ -3956,13 +3956,8 @@ static void bypass_lb_node(struct scx_sched *sch, int node)
> nr_donor_target, nr_target);
> }
>
> - for_each_cpu(cpu, resched_mask) {
> - struct rq *rq = cpu_rq(cpu);
> -
> - raw_spin_rq_lock_irq(rq);
> - resched_curr(rq);
> - raw_spin_rq_unlock_irq(rq);
> - }
> + for_each_cpu(cpu, resched_mask)
> + resched_cpu(cpu);
>
> for_each_cpu_and(cpu, cpu_online_mask, node_mask) {
> u32 nr = READ_ONCE(cpu_rq(cpu)->scx.bypass_dsq.nr);
> --
> 2.17.1
>
Applied to sched_ext/for-6.19-fixes. Thanks. -- tejun
This commit only make irq_work_queue() to be called when the
llist_add() returns true.
Signed-off-by: Zqiang <qiang.zhang@linux.dev>
---
kernel/sched/ext.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 8f6d8d7f895c..136b01950a62 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -3439,8 +3439,8 @@ static void destroy_dsq(struct scx_sched *sch, u64 dsq_id)
* operations inside scheduler locks.
*/
dsq->id = SCX_DSQ_INVALID;
- llist_add(&dsq->free_node, &dsqs_to_free);
- irq_work_queue(&free_dsq_irq_work);
+ if (llist_add(&dsq->free_node, &dsqs_to_free))
+ irq_work_queue(&free_dsq_irq_work);
out_unlock_dsq:
raw_spin_unlock_irqrestore(&dsq->lock, flags);
--
2.17.1
On Mon, Dec 22, 2025 at 07:53:18PM +0800, Zqiang wrote: > This commit only make irq_work_queue() to be called when the > llist_add() returns true. Just to be more clear, we could rephrase the commit message as follows: llist_add() returns true only when adding to an empty list, which indicates that no IRQ work is currently queued or running. Therefore, we only need to call irq_work_queue() when llist_add() returns true, to avoid unnecessarily re-queueing IRQ work that is already pending or executing. > > Signed-off-by: Zqiang <qiang.zhang@linux.dev> But overall, looks good to me. Reviewed-by: Andrea Righi <arighi@nvidia.com> Thanks, -Andrea > --- > kernel/sched/ext.c | 4 ++-- > 1 file changed, 2 insertions(+), 2 deletions(-) > > diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c > index 8f6d8d7f895c..136b01950a62 100644 > --- a/kernel/sched/ext.c > +++ b/kernel/sched/ext.c > @@ -3439,8 +3439,8 @@ static void destroy_dsq(struct scx_sched *sch, u64 dsq_id) > * operations inside scheduler locks. > */ > dsq->id = SCX_DSQ_INVALID; > - llist_add(&dsq->free_node, &dsqs_to_free); > - irq_work_queue(&free_dsq_irq_work); > + if (llist_add(&dsq->free_node, &dsqs_to_free)) > + irq_work_queue(&free_dsq_irq_work); > > out_unlock_dsq: > raw_spin_unlock_irqrestore(&dsq->lock, flags); > -- > 2.17.1 >
> > On Mon, Dec 22, 2025 at 07:53:18PM +0800, Zqiang wrote: > > > > > This commit only make irq_work_queue() to be called when the > > llist_add() returns true. > > > Just to be more clear, we could rephrase the commit message as follows: > > llist_add() returns true only when adding to an empty list, which indicates > that no IRQ work is currently queued or running. Therefore, we only need to > call irq_work_queue() when llist_add() returns true, to avoid unnecessarily > re-queueing IRQ work that is already pending or executing. Thank you for make the commit more clear and reviewed :) . Thanks Zqiang > > > > > Signed-off-by: Zqiang <qiang.zhang@linux.dev> > > > But overall, looks good to me. > > Reviewed-by: Andrea Righi <arighi@nvidia.com> > > Thanks, > -Andrea > > > > > --- > > kernel/sched/ext.c | 4 ++-- > > 1 file changed, 2 insertions(+), 2 deletions(-) > > > > diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c > > index 8f6d8d7f895c..136b01950a62 100644 > > --- a/kernel/sched/ext.c > > +++ b/kernel/sched/ext.c > > @@ -3439,8 +3439,8 @@ static void destroy_dsq(struct scx_sched *sch, u64 dsq_id) > > * operations inside scheduler locks. > > */ > > dsq->id = SCX_DSQ_INVALID; > > - llist_add(&dsq->free_node, &dsqs_to_free); > > - irq_work_queue(&free_dsq_irq_work); > > + if (llist_add(&dsq->free_node, &dsqs_to_free)) > > + irq_work_queue(&free_dsq_irq_work); > > > > out_unlock_dsq: > > raw_spin_unlock_irqrestore(&dsq->lock, flags); > > -- > > 2.17.1 > > >
Applied to sched_ext/for-6.20. Note: Commit message updated per Andrea's suggestion. Thanks. -- tejun
© 2016 - 2026 Red Hat, Inc.