include/linux/sched.h | 1 - kernel/sched/deadline.c | 12 +----------- kernel/sched/fair.c | 7 +------ kernel/sched/sched.h | 4 ---- 4 files changed, 2 insertions(+), 22 deletions(-)
The following commit has been merged into the sched/urgent branch of tip:
Commit-ID: 077e1e2e0015e5ba6538d1c5299fb299a3a92d60
Gitweb: https://git.kernel.org/tip/077e1e2e0015e5ba6538d1c5299fb299a3a92d60
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Tue, 16 Sep 2025 23:02:41 +02:00
Committer: Peter Zijlstra <peterz@infradead.org>
CommitterDate: Thu, 18 Sep 2025 08:50:05 +02:00
sched/deadline: Fix dl_server getting stuck
John found it was easy to hit lockup warnings when running locktorture
on a 2 CPU VM, which he bisected down to: commit cccb45d7c429
("sched/deadline: Less agressive dl_server handling").
While debugging it seems there is a chance where we end up with the
dl_server dequeued, with dl_se->dl_server_active. This causes
dl_server_start() to return without enqueueing the dl_server, thus it
fails to run when RT tasks starve the cpu.
When this happens, dl_server_timer() catches the
'!dl_se->server_has_tasks(dl_se)' case, which then calls
replenish_dl_entity() and dl_server_stopped() and finally return
HRTIMER_NO_RESTART.
This ends in no new timer and also no enqueue, leaving the dl_server
'dead', allowing starvation.
What should have happened is for the bandwidth timer to start the
zero-laxity timer, which in turn would enqueue the dl_server and cause
dl_se->server_pick_task() to be called -- which will stop the
dl_server if no fair tasks are observed for a whole period.
IOW, it is totally irrelevant if there are fair tasks at the moment of
bandwidth refresh.
This removes all dl_se->server_has_tasks() users, so remove the whole
thing.
Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling")
Reported-by: John Stultz <jstultz@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: John Stultz <jstultz@google.com>
---
include/linux/sched.h | 1 -
kernel/sched/deadline.c | 12 +-----------
kernel/sched/fair.c | 7 +------
kernel/sched/sched.h | 4 ----
4 files changed, 2 insertions(+), 22 deletions(-)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f8188b8..f89313b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -733,7 +733,6 @@ struct sched_dl_entity {
* runnable task.
*/
struct rq *rq;
- dl_server_has_tasks_f server_has_tasks;
dl_server_pick_f server_pick_task;
#ifdef CONFIG_RT_MUTEXES
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index f253012..5a5080b 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -875,7 +875,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
*/
if (dl_se->dl_defer && !dl_se->dl_defer_running &&
dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) {
- if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) {
+ if (!is_dl_boosted(dl_se)) {
/*
* Set dl_se->dl_defer_armed and dl_throttled variables to
@@ -1152,8 +1152,6 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf)
/* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;
-static bool dl_server_stopped(struct sched_dl_entity *dl_se);
-
static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
{
struct rq *rq = rq_of_dl_se(dl_se);
@@ -1171,12 +1169,6 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
if (!dl_se->dl_runtime)
return HRTIMER_NORESTART;
- if (!dl_se->server_has_tasks(dl_se)) {
- replenish_dl_entity(dl_se);
- dl_server_stopped(dl_se);
- return HRTIMER_NORESTART;
- }
-
if (dl_se->dl_defer_armed) {
/*
* First check if the server could consume runtime in background.
@@ -1625,11 +1617,9 @@ static bool dl_server_stopped(struct sched_dl_entity *dl_se)
}
void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
- dl_server_has_tasks_f has_tasks,
dl_server_pick_f pick_task)
{
dl_se->rq = rq;
- dl_se->server_has_tasks = has_tasks;
dl_se->server_pick_task = pick_task;
}
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c4d91e8..59d7dc9 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -8859,11 +8859,6 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq, struct task_stru
return pick_next_task_fair(rq, prev, NULL);
}
-static bool fair_server_has_tasks(struct sched_dl_entity *dl_se)
-{
- return !!dl_se->rq->cfs.nr_queued;
-}
-
static struct task_struct *fair_server_pick_task(struct sched_dl_entity *dl_se)
{
return pick_task_fair(dl_se->rq);
@@ -8875,7 +8870,7 @@ void fair_server_init(struct rq *rq)
init_dl_entity(dl_se);
- dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick_task);
+ dl_server_init(dl_se, rq, fair_server_pick_task);
}
/*
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index be9745d..f10d627 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -365,9 +365,6 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6
*
* dl_se::rq -- runqueue we belong to.
*
- * dl_se::server_has_tasks() -- used on bandwidth enforcement; we 'stop' the
- * server when it runs out of tasks to run.
- *
* dl_se::server_pick() -- nested pick_next_task(); we yield the period if this
* returns NULL.
*
@@ -383,7 +380,6 @@ extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec);
extern void dl_server_start(struct sched_dl_entity *dl_se);
extern void dl_server_stop(struct sched_dl_entity *dl_se);
extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
- dl_server_has_tasks_f has_tasks,
dl_server_pick_f pick_task);
extern void sched_init_dl_servers(void);
On 18.09.2025 08:56, tip-bot2 for Peter Zijlstra wrote:
> The following commit has been merged into the sched/urgent branch of tip:
>
> Commit-ID: 077e1e2e0015e5ba6538d1c5299fb299a3a92d60
> Gitweb: https://git.kernel.org/tip/077e1e2e0015e5ba6538d1c5299fb299a3a92d60
> Author: Peter Zijlstra <peterz@infradead.org>
> AuthorDate: Tue, 16 Sep 2025 23:02:41 +02:00
> Committer: Peter Zijlstra <peterz@infradead.org>
> CommitterDate: Thu, 18 Sep 2025 08:50:05 +02:00
>
> sched/deadline: Fix dl_server getting stuck
>
> John found it was easy to hit lockup warnings when running locktorture
> on a 2 CPU VM, which he bisected down to: commit cccb45d7c429
> ("sched/deadline: Less agressive dl_server handling").
>
> While debugging it seems there is a chance where we end up with the
> dl_server dequeued, with dl_se->dl_server_active. This causes
> dl_server_start() to return without enqueueing the dl_server, thus it
> fails to run when RT tasks starve the cpu.
>
> When this happens, dl_server_timer() catches the
> '!dl_se->server_has_tasks(dl_se)' case, which then calls
> replenish_dl_entity() and dl_server_stopped() and finally return
> HRTIMER_NO_RESTART.
>
> This ends in no new timer and also no enqueue, leaving the dl_server
> 'dead', allowing starvation.
>
> What should have happened is for the bandwidth timer to start the
> zero-laxity timer, which in turn would enqueue the dl_server and cause
> dl_se->server_pick_task() to be called -- which will stop the
> dl_server if no fair tasks are observed for a whole period.
>
> IOW, it is totally irrelevant if there are fair tasks at the moment of
> bandwidth refresh.
>
> This removes all dl_se->server_has_tasks() users, so remove the whole
> thing.
>
> Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling")
> Reported-by: John Stultz <jstultz@google.com>
> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> Tested-by: John Stultz <jstultz@google.com>
> ---
This patch landed in today's linux-next as commit 077e1e2e0015
("sched/deadline: Fix dl_server getting stuck"). In my tests I found
that it breaks CPU hotplug on some of my systems. On 64bit
Exynos5433-based TM2e board I've captured the following lock dep warning
(which unfortunately doesn't look like really related to CPU hotplug):
# for i in /sys/devices/system/cpu/cpu[1-9]; do echo 0 >$i/online; done
Detected VIPT I-cache on CPU7
CPU7: Booted secondary processor 0x0000000101 [0x410fd031]
------------[ cut here ]------------
WARNING: CPU: 7 PID: 0 at kernel/rcu/tree.c:4329
rcutree_report_cpu_starting+0x1e8/0x348
Modules linked in: brcmfmac_wcc cpufreq_powersave cpufreq_conservative
brcmfmac brcmutil sha256 snd_soc_wm5110 cfg80211 snd_soc_wm_adsp cs_dsp
snd_soc_tm2_wm5110 snd_soc_arizona arizona_micsupp phy_exynos5_usbdrd
s5p_mfc typec arizona_ldo1 hci_uart btqca s5p_jpeg max77693_haptic btbcm
s3fwrn5_i2c exynos_gsc bluetooth s3fwrn5 nci v4l2_mem2mem nfc
snd_soc_i2s snd_soc_idma snd_soc_hdmi_codec snd_soc_max98504
snd_soc_s3c_dma videobuf2_dma_contig videobuf2_memops ecdh_generic
snd_soc_core ir_spi videobuf2_v4l2 ecc snd_compress ntc_thermistor
panfrost videodev snd_pcm_dmaengine snd_pcm rfkill drm_shmem_helper
panel_samsung_s6e3ha2 videobuf2_common backlight pwrseq_core gpu_sched
mc snd_timer snd soundcore ipv6
CPU: 7 UID: 0 PID: 0 Comm: swapper/7 Not tainted 6.17.0-rc6+ #16012 PREEMPT
Hardware name: Samsung TM2E board (DT)
Hardware name: Samsung TM2E board (DT)
Detected VIPT I-cache on CPU7
======================================================
WARNING: possible circular locking dependency detected
6.17.0-rc6+ #16012 Not tainted
------------------------------------------------------
swapper/7/0 is trying to acquire lock:
ffff000024021cc8 (&irq_desc_lock_class){-.-.}-{2:2}, at:
__irq_get_desc_lock+0x5c/0x9c
but task is already holding lock:
ffff800083e479c0 (&port_lock_key){-.-.}-{3:3}, at:
s3c24xx_serial_console_write+0x80/0x268
which lock already depends on the new lock.
the existing dependency chain (in reverse order) is:
-> #2 (&port_lock_key){-.-.}-{3:3}:
_raw_spin_lock_irqsave+0x60/0x88
s3c24xx_serial_console_write+0x80/0x268
console_flush_all+0x304/0x49c
console_unlock+0x70/0x110
vprintk_emit+0x254/0x39c
vprintk_default+0x38/0x44
vprintk+0x28/0x34
_printk+0x5c/0x84
register_console+0x3ac/0x4f8
serial_core_register_port+0x6c4/0x7a4
serial_ctrl_register_port+0x10/0x1c
uart_add_one_port+0x10/0x1c
s3c24xx_serial_probe+0x34c/0x6d8
platform_probe+0x5c/0xac
really_probe+0xbc/0x298
__driver_probe_device+0x78/0x12c
driver_probe_device+0xdc/0x164
__device_attach_driver+0xb8/0x138
bus_for_each_drv+0x80/0xdc
__device_attach+0xa8/0x1b0
device_initial_probe+0x14/0x20
bus_probe_device+0xb0/0xb4
deferred_probe_work_func+0x8c/0xc8
process_one_work+0x208/0x60c
worker_thread+0x244/0x388
kthread+0x150/0x228
ret_from_fork+0x10/0x20
-> #1 (console_owner){..-.}-{0:0}:
console_lock_spinning_enable+0x6c/0x7c
console_flush_all+0x2c8/0x49c
console_unlock+0x70/0x110
vprintk_emit+0x254/0x39c
vprintk_default+0x38/0x44
vprintk+0x28/0x34
_printk+0x5c/0x84
exynos_wkup_irq_set_wake+0x80/0xa4
irq_set_irq_wake+0x164/0x1e0
arizona_irq_set_wake+0x18/0x24
irq_set_irq_wake+0x164/0x1e0
regmap_irq_sync_unlock+0x328/0x530
__irq_put_desc_unlock+0x48/0x4c
irq_set_irq_wake+0x84/0x1e0
arizona_set_irq_wake+0x5c/0x70
wm5110_probe+0x220/0x354 [snd_soc_wm5110]
platform_probe+0x5c/0xac
really_probe+0xbc/0x298
__driver_probe_device+0x78/0x12c
driver_probe_device+0xdc/0x164
__driver_attach+0x9c/0x1ac
bus_for_each_dev+0x74/0xd0
driver_attach+0x24/0x30
bus_add_driver+0xe4/0x208
driver_register+0x60/0x128
__platform_driver_register+0x24/0x30
cs_exit+0xc/0x20 [cpufreq_conservative]
do_one_initcall+0x64/0x308
do_init_module+0x58/0x23c
load_module+0x1b48/0x1dc4
init_module_from_file+0x84/0xc4
idempotent_init_module+0x188/0x280
__arm64_sys_finit_module+0x68/0xac
invoke_syscall+0x48/0x110
el0_svc_.common.c
(system is frozen at this point).
Let me know if I can help somehow debugging this issue. Reverting
$subject on top of linux-next fixes this issue.
> include/linux/sched.h | 1 -
> kernel/sched/deadline.c | 12 +-----------
> kernel/sched/fair.c | 7 +------
> kernel/sched/sched.h | 4 ----
> 4 files changed, 2 insertions(+), 22 deletions(-)
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index f8188b8..f89313b 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -733,7 +733,6 @@ struct sched_dl_entity {
> * runnable task.
> */
> struct rq *rq;
> - dl_server_has_tasks_f server_has_tasks;
> dl_server_pick_f server_pick_task;
>
> #ifdef CONFIG_RT_MUTEXES
> diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
> index f253012..5a5080b 100644
> --- a/kernel/sched/deadline.c
> +++ b/kernel/sched/deadline.c
> @@ -875,7 +875,7 @@ static void replenish_dl_entity(struct sched_dl_entity *dl_se)
> */
> if (dl_se->dl_defer && !dl_se->dl_defer_running &&
> dl_time_before(rq_clock(dl_se->rq), dl_se->deadline - dl_se->runtime)) {
> - if (!is_dl_boosted(dl_se) && dl_se->server_has_tasks(dl_se)) {
> + if (!is_dl_boosted(dl_se)) {
>
> /*
> * Set dl_se->dl_defer_armed and dl_throttled variables to
> @@ -1152,8 +1152,6 @@ static void __push_dl_task(struct rq *rq, struct rq_flags *rf)
> /* a defer timer will not be reset if the runtime consumed was < dl_server_min_res */
> static const u64 dl_server_min_res = 1 * NSEC_PER_MSEC;
>
> -static bool dl_server_stopped(struct sched_dl_entity *dl_se);
> -
> static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_dl_entity *dl_se)
> {
> struct rq *rq = rq_of_dl_se(dl_se);
> @@ -1171,12 +1169,6 @@ static enum hrtimer_restart dl_server_timer(struct hrtimer *timer, struct sched_
> if (!dl_se->dl_runtime)
> return HRTIMER_NORESTART;
>
> - if (!dl_se->server_has_tasks(dl_se)) {
> - replenish_dl_entity(dl_se);
> - dl_server_stopped(dl_se);
> - return HRTIMER_NORESTART;
> - }
> -
> if (dl_se->dl_defer_armed) {
> /*
> * First check if the server could consume runtime in background.
> @@ -1625,11 +1617,9 @@ static bool dl_server_stopped(struct sched_dl_entity *dl_se)
> }
>
> void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
> - dl_server_has_tasks_f has_tasks,
> dl_server_pick_f pick_task)
> {
> dl_se->rq = rq;
> - dl_se->server_has_tasks = has_tasks;
> dl_se->server_pick_task = pick_task;
> }
>
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index c4d91e8..59d7dc9 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -8859,11 +8859,6 @@ static struct task_struct *__pick_next_task_fair(struct rq *rq, struct task_stru
> return pick_next_task_fair(rq, prev, NULL);
> }
>
> -static bool fair_server_has_tasks(struct sched_dl_entity *dl_se)
> -{
> - return !!dl_se->rq->cfs.nr_queued;
> -}
> -
> static struct task_struct *fair_server_pick_task(struct sched_dl_entity *dl_se)
> {
> return pick_task_fair(dl_se->rq);
> @@ -8875,7 +8870,7 @@ void fair_server_init(struct rq *rq)
>
> init_dl_entity(dl_se);
>
> - dl_server_init(dl_se, rq, fair_server_has_tasks, fair_server_pick_task);
> + dl_server_init(dl_se, rq, fair_server_pick_task);
> }
>
> /*
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index be9745d..f10d627 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -365,9 +365,6 @@ extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s6
> *
> * dl_se::rq -- runqueue we belong to.
> *
> - * dl_se::server_has_tasks() -- used on bandwidth enforcement; we 'stop' the
> - * server when it runs out of tasks to run.
> - *
> * dl_se::server_pick() -- nested pick_next_task(); we yield the period if this
> * returns NULL.
> *
> @@ -383,7 +380,6 @@ extern void dl_server_update(struct sched_dl_entity *dl_se, s64 delta_exec);
> extern void dl_server_start(struct sched_dl_entity *dl_se);
> extern void dl_server_stop(struct sched_dl_entity *dl_se);
> extern void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
> - dl_server_has_tasks_f has_tasks,
> dl_server_pick_f pick_task);
> extern void sched_init_dl_servers(void);
>
>
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
On Mon, Sep 22, 2025 at 11:57:02PM +0200, Marek Szyprowski wrote:
> On 18.09.2025 08:56, tip-bot2 for Peter Zijlstra wrote:
> > The following commit has been merged into the sched/urgent branch of tip:
> >
> > Commit-ID: 077e1e2e0015e5ba6538d1c5299fb299a3a92d60
> > Gitweb: https://git.kernel.org/tip/077e1e2e0015e5ba6538d1c5299fb299a3a92d60
> > Author: Peter Zijlstra <peterz@infradead.org>
> > AuthorDate: Tue, 16 Sep 2025 23:02:41 +02:00
> > Committer: Peter Zijlstra <peterz@infradead.org>
> > CommitterDate: Thu, 18 Sep 2025 08:50:05 +02:00
> >
> > sched/deadline: Fix dl_server getting stuck
> >
> > John found it was easy to hit lockup warnings when running locktorture
> > on a 2 CPU VM, which he bisected down to: commit cccb45d7c429
> > ("sched/deadline: Less agressive dl_server handling").
> >
> > While debugging it seems there is a chance where we end up with the
> > dl_server dequeued, with dl_se->dl_server_active. This causes
> > dl_server_start() to return without enqueueing the dl_server, thus it
> > fails to run when RT tasks starve the cpu.
> >
> > When this happens, dl_server_timer() catches the
> > '!dl_se->server_has_tasks(dl_se)' case, which then calls
> > replenish_dl_entity() and dl_server_stopped() and finally return
> > HRTIMER_NO_RESTART.
> >
> > This ends in no new timer and also no enqueue, leaving the dl_server
> > 'dead', allowing starvation.
> >
> > What should have happened is for the bandwidth timer to start the
> > zero-laxity timer, which in turn would enqueue the dl_server and cause
> > dl_se->server_pick_task() to be called -- which will stop the
> > dl_server if no fair tasks are observed for a whole period.
> >
> > IOW, it is totally irrelevant if there are fair tasks at the moment of
> > bandwidth refresh.
> >
> > This removes all dl_se->server_has_tasks() users, so remove the whole
> > thing.
> >
> > Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling")
> > Reported-by: John Stultz <jstultz@google.com>
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > Tested-by: John Stultz <jstultz@google.com>
> > ---
>
> This patch landed in today's linux-next as commit 077e1e2e0015
> ("sched/deadline: Fix dl_server getting stuck"). In my tests I found
> that it breaks CPU hotplug on some of my systems. On 64bit
> Exynos5433-based TM2e board I've captured the following lock dep warning
> (which unfortunately doesn't look like really related to CPU hotplug):
Right -- I've looked at this patch a few times over the day, and the
only thing I can think of is that we keep the dl_server timer running.
But I already gave you a patch that *should've* stopped it.
There were a few issues with it -- notably if you've booted with
something like isolcpus / nohz_full it might not have worked because the
site I put the dl_server_stop() would only get ran if there was a root
domain attached to the CPU.
Put it in a different spot, just to make sure.
There is also the fact that dl_server_stop() uses
hrtimer_try_to_cancel(), which can 'fail' when the timer is actively
running. But if that is the case, it must be spin-waiting on rq->lock
-- since the caller of dl_server_stop() will be holding that. Once
dl_server_stop() completes and the rq->lock is released, the timer will
see !dl_se->dl_throttled and immediately stop without restarting.
So that *should* not be a problem.
Anyway, clutching at staws here etc.
> # for i in /sys/devices/system/cpu/cpu[1-9]; do echo 0 >$i/online; done
> Detected VIPT I-cache on CPU7
> CPU7: Booted secondary processor 0x0000000101 [0x410fd031]
> ------------[ cut here ]------------
> WARNING: CPU: 7 PID: 0 at kernel/rcu/tree.c:4329
> rcutree_report_cpu_starting+0x1e8/0x348
This is really weird; this does indeed look like CPU7 decides to boot
again. AFAICT it is not hotplug failing and bringing the CPU back again,
but it is really starting again.
I'm not well versed enough in ARM64 foo to know what would cause a CPU
to boot -- but on x86_64 this isn't something that would easily happen
by accident.
Not stopping a timer would certainly not be sufficient -- notably
hrtimers_cpu_dying() would have migrated the thing.
> (system is frozen at this point).
The whole lockdep and freezing thing is typically printk choking on
itself.
My personal way around this are these here patches:
git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git debug/experimental
They don't apply cleanly anymore, but the conflict isn't hard, so I've
not taken the bother to rebase them yet. It relies on the platform
having earlyprintk configured, then add force_early_printk to your
kernel cmdline to have earlyprintk completely take over.
Typical early serial drivers are lock-free and don't suffer from
lockups.
If you get it to work, you might get more data out of it.
---
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3eb6faa91d06..c0b1dc360e68 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -8363,6 +8363,7 @@ static inline void sched_set_rq_offline(struct rq *rq, int cpu)
BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
set_rq_offline(rq);
}
+ dl_server_stop(&rq->fair_server);
rq_unlock_irqrestore(rq, &rf);
}
On 24.09.2025 00:02, Peter Zijlstra wrote:
> On Mon, Sep 22, 2025 at 11:57:02PM +0200, Marek Szyprowski wrote:
>> On 18.09.2025 08:56, tip-bot2 for Peter Zijlstra wrote:
>>> The following commit has been merged into the sched/urgent branch of tip:
>>>
>>> Commit-ID: 077e1e2e0015e5ba6538d1c5299fb299a3a92d60
>>> Gitweb: https://git.kernel.org/tip/077e1e2e0015e5ba6538d1c5299fb299a3a92d60
>>> Author: Peter Zijlstra <peterz@infradead.org>
>>> AuthorDate: Tue, 16 Sep 2025 23:02:41 +02:00
>>> Committer: Peter Zijlstra <peterz@infradead.org>
>>> CommitterDate: Thu, 18 Sep 2025 08:50:05 +02:00
>>>
>>> sched/deadline: Fix dl_server getting stuck
>>>
>>> John found it was easy to hit lockup warnings when running locktorture
>>> on a 2 CPU VM, which he bisected down to: commit cccb45d7c429
>>> ("sched/deadline: Less agressive dl_server handling").
>>>
>>> While debugging it seems there is a chance where we end up with the
>>> dl_server dequeued, with dl_se->dl_server_active. This causes
>>> dl_server_start() to return without enqueueing the dl_server, thus it
>>> fails to run when RT tasks starve the cpu.
>>>
>>> When this happens, dl_server_timer() catches the
>>> '!dl_se->server_has_tasks(dl_se)' case, which then calls
>>> replenish_dl_entity() and dl_server_stopped() and finally return
>>> HRTIMER_NO_RESTART.
>>>
>>> This ends in no new timer and also no enqueue, leaving the dl_server
>>> 'dead', allowing starvation.
>>>
>>> What should have happened is for the bandwidth timer to start the
>>> zero-laxity timer, which in turn would enqueue the dl_server and cause
>>> dl_se->server_pick_task() to be called -- which will stop the
>>> dl_server if no fair tasks are observed for a whole period.
>>>
>>> IOW, it is totally irrelevant if there are fair tasks at the moment of
>>> bandwidth refresh.
>>>
>>> This removes all dl_se->server_has_tasks() users, so remove the whole
>>> thing.
>>>
>>> Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling")
>>> Reported-by: John Stultz <jstultz@google.com>
>>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>>> Tested-by: John Stultz <jstultz@google.com>
>>> ---
>> This patch landed in today's linux-next as commit 077e1e2e0015
>> ("sched/deadline: Fix dl_server getting stuck"). In my tests I found
>> that it breaks CPU hotplug on some of my systems. On 64bit
>> Exynos5433-based TM2e board I've captured the following lock dep warning
>> (which unfortunately doesn't look like really related to CPU hotplug):
> Right -- I've looked at this patch a few times over the day, and the
> only thing I can think of is that we keep the dl_server timer running.
> But I already gave you a patch that *should've* stopped it.
>
> There were a few issues with it -- notably if you've booted with
> something like isolcpus / nohz_full it might not have worked because the
> site I put the dl_server_stop() would only get ran if there was a root
> domain attached to the CPU.
>
> Put it in a different spot, just to make sure.
>
> There is also the fact that dl_server_stop() uses
> hrtimer_try_to_cancel(), which can 'fail' when the timer is actively
> running. But if that is the case, it must be spin-waiting on rq->lock
> -- since the caller of dl_server_stop() will be holding that. Once
> dl_server_stop() completes and the rq->lock is released, the timer will
> see !dl_se->dl_throttled and immediately stop without restarting.
>
> So that *should* not be a problem.
>
> Anyway, clutching at staws here etc.
>
>> # for i in /sys/devices/system/cpu/cpu[1-9]; do echo 0 >$i/online; done
>> Detected VIPT I-cache on CPU7
>> CPU7: Booted secondary processor 0x0000000101 [0x410fd031]
>> ------------[ cut here ]------------
>> WARNING: CPU: 7 PID: 0 at kernel/rcu/tree.c:4329
>> rcutree_report_cpu_starting+0x1e8/0x348
> This is really weird; this does indeed look like CPU7 decides to boot
> again. AFAICT it is not hotplug failing and bringing the CPU back again,
> but it is really starting again.
>
> I'm not well versed enough in ARM64 foo to know what would cause a CPU
> to boot -- but on x86_64 this isn't something that would easily happen
> by accident.
>
> Not stopping a timer would certainly not be sufficient -- notably
> hrtimers_cpu_dying() would have migrated the thing.
>
>> (system is frozen at this point).
> The whole lockdep and freezing thing is typically printk choking on
> itself.
>
> My personal way around this are these here patches:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git debug/experimental
>
> They don't apply cleanly anymore, but the conflict isn't hard, so I've
> not taken the bother to rebase them yet. It relies on the platform
> having earlyprintk configured, then add force_early_printk to your
> kernel cmdline to have earlyprintk completely take over.
>
> Typical early serial drivers are lock-free and don't suffer from
> lockups.
>
> If you get it to work, you might get more data out of it.
Thanks for some hints, but unfortunately ARM64 doesn't support
earlyprintk, so I was not able to use this method.
However I've played a bit with this code and found that this strange
wake-up of the CPU7 seems to be caused by the timer. If I restore
if (!dl_se->server_has_tasks(dl_se))
return HRTIMER_NORESTART;
part in the dl_server_timer, the everything works again as before this
patch.
This issue is however not Exynos5433 ARM 64bit specific. Similar lockup
happens on Exynos5422 ARM 32bit boards, although there is no message in
that case. Does it mean that handling of the hrtimers on Exynos boards
is a bit broken in the context of CPU hotplug? I've never analyzed that
part of Exynos SoC support. Krzysztof, any chance You remember how it works?
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
On Mon, Sep 22, 2025 at 11:57:02PM +0200, Marek Szyprowski wrote:
> On 18.09.2025 08:56, tip-bot2 for Peter Zijlstra wrote:
> > The following commit has been merged into the sched/urgent branch of tip:
> >
> > Commit-ID: 077e1e2e0015e5ba6538d1c5299fb299a3a92d60
> > Gitweb: https://git.kernel.org/tip/077e1e2e0015e5ba6538d1c5299fb299a3a92d60
> > Author: Peter Zijlstra <peterz@infradead.org>
> > AuthorDate: Tue, 16 Sep 2025 23:02:41 +02:00
> > Committer: Peter Zijlstra <peterz@infradead.org>
> > CommitterDate: Thu, 18 Sep 2025 08:50:05 +02:00
> >
> > sched/deadline: Fix dl_server getting stuck
> >
> > John found it was easy to hit lockup warnings when running locktorture
> > on a 2 CPU VM, which he bisected down to: commit cccb45d7c429
> > ("sched/deadline: Less agressive dl_server handling").
> >
> > While debugging it seems there is a chance where we end up with the
> > dl_server dequeued, with dl_se->dl_server_active. This causes
> > dl_server_start() to return without enqueueing the dl_server, thus it
> > fails to run when RT tasks starve the cpu.
> >
> > When this happens, dl_server_timer() catches the
> > '!dl_se->server_has_tasks(dl_se)' case, which then calls
> > replenish_dl_entity() and dl_server_stopped() and finally return
> > HRTIMER_NO_RESTART.
> >
> > This ends in no new timer and also no enqueue, leaving the dl_server
> > 'dead', allowing starvation.
> >
> > What should have happened is for the bandwidth timer to start the
> > zero-laxity timer, which in turn would enqueue the dl_server and cause
> > dl_se->server_pick_task() to be called -- which will stop the
> > dl_server if no fair tasks are observed for a whole period.
> >
> > IOW, it is totally irrelevant if there are fair tasks at the moment of
> > bandwidth refresh.
> >
> > This removes all dl_se->server_has_tasks() users, so remove the whole
> > thing.
> >
> > Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling")
> > Reported-by: John Stultz <jstultz@google.com>
> > Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> > Tested-by: John Stultz <jstultz@google.com>
> > ---
>
> This patch landed in today's linux-next as commit 077e1e2e0015
> ("sched/deadline: Fix dl_server getting stuck"). In my tests I found
> that it breaks CPU hotplug on some of my systems. On 64bit
> Exynos5433-based TM2e board I've captured the following lock dep warning
> (which unfortunately doesn't look like really related to CPU hotplug):
Absolutely wild guess; does something like this help?
---
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 18a30ae35441..bf78c46620a5 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -12972,6 +12972,8 @@ static void rq_offline_fair(struct rq *rq)
/* Ensure that we remove rq contribution to group share: */
clear_tg_offline_cfs_rqs(rq);
+
+ dl_server_stop(&rq->fair_server);
}
#ifdef CONFIG_SCHED_CORE
On 23.09.2025 09:25, Peter Zijlstra wrote:
> On Mon, Sep 22, 2025 at 11:57:02PM +0200, Marek Szyprowski wrote:
>> On 18.09.2025 08:56, tip-bot2 for Peter Zijlstra wrote:
>>> The following commit has been merged into the sched/urgent branch of tip:
>>>
>>> Commit-ID: 077e1e2e0015e5ba6538d1c5299fb299a3a92d60
>>> Gitweb: https://git.kernel.org/tip/077e1e2e0015e5ba6538d1c5299fb299a3a92d60
>>> Author: Peter Zijlstra <peterz@infradead.org>
>>> AuthorDate: Tue, 16 Sep 2025 23:02:41 +02:00
>>> Committer: Peter Zijlstra <peterz@infradead.org>
>>> CommitterDate: Thu, 18 Sep 2025 08:50:05 +02:00
>>>
>>> sched/deadline: Fix dl_server getting stuck
>>>
>>> John found it was easy to hit lockup warnings when running locktorture
>>> on a 2 CPU VM, which he bisected down to: commit cccb45d7c429
>>> ("sched/deadline: Less agressive dl_server handling").
>>>
>>> While debugging it seems there is a chance where we end up with the
>>> dl_server dequeued, with dl_se->dl_server_active. This causes
>>> dl_server_start() to return without enqueueing the dl_server, thus it
>>> fails to run when RT tasks starve the cpu.
>>>
>>> When this happens, dl_server_timer() catches the
>>> '!dl_se->server_has_tasks(dl_se)' case, which then calls
>>> replenish_dl_entity() and dl_server_stopped() and finally return
>>> HRTIMER_NO_RESTART.
>>>
>>> This ends in no new timer and also no enqueue, leaving the dl_server
>>> 'dead', allowing starvation.
>>>
>>> What should have happened is for the bandwidth timer to start the
>>> zero-laxity timer, which in turn would enqueue the dl_server and cause
>>> dl_se->server_pick_task() to be called -- which will stop the
>>> dl_server if no fair tasks are observed for a whole period.
>>>
>>> IOW, it is totally irrelevant if there are fair tasks at the moment of
>>> bandwidth refresh.
>>>
>>> This removes all dl_se->server_has_tasks() users, so remove the whole
>>> thing.
>>>
>>> Fixes: cccb45d7c4295 ("sched/deadline: Less agressive dl_server handling")
>>> Reported-by: John Stultz <jstultz@google.com>
>>> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
>>> Tested-by: John Stultz <jstultz@google.com>
>>> ---
>> This patch landed in today's linux-next as commit 077e1e2e0015
>> ("sched/deadline: Fix dl_server getting stuck"). In my tests I found
>> that it breaks CPU hotplug on some of my systems. On 64bit
>> Exynos5433-based TM2e board I've captured the following lock dep warning
>> (which unfortunately doesn't look like really related to CPU hotplug):
> Absolutely wild guess; does something like this help?
>
> ---
> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
> index 18a30ae35441..bf78c46620a5 100644
> --- a/kernel/sched/fair.c
> +++ b/kernel/sched/fair.c
> @@ -12972,6 +12972,8 @@ static void rq_offline_fair(struct rq *rq)
>
> /* Ensure that we remove rq contribution to group share: */
> clear_tg_offline_cfs_rqs(rq);
> +
> + dl_server_stop(&rq->fair_server);
> }
>
> #ifdef CONFIG_SCHED_CORE
Unfortunately not, same log.
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
On Mon, Sep 22, 2025 at 2:57 PM Marek Szyprowski
<m.szyprowski@samsung.com> wrote:
> This patch landed in today's linux-next as commit 077e1e2e0015
> ("sched/deadline: Fix dl_server getting stuck"). In my tests I found
> that it breaks CPU hotplug on some of my systems. On 64bit
> Exynos5433-based TM2e board I've captured the following lock dep warning
> (which unfortunately doesn't look like really related to CPU hotplug):
>
Huh. Nor does it really look related to the dl_server change. Interesting...
> # for i in /sys/devices/system/cpu/cpu[1-9]; do echo 0 >$i/online; done
> Detected VIPT I-cache on CPU7
> CPU7: Booted secondary processor 0x0000000101 [0x410fd031]
> ------------[ cut here ]------------
> WARNING: CPU: 7 PID: 0 at kernel/rcu/tree.c:4329
> rcutree_report_cpu_starting+0x1e8/0x348
> Modules linked in: brcmfmac_wcc cpufreq_powersave cpufreq_conservative
> brcmfmac brcmutil sha256 snd_soc_wm5110 cfg80211 snd_soc_wm_adsp cs_dsp
> snd_soc_tm2_wm5110 snd_soc_arizona arizona_micsupp phy_exynos5_usbdrd
> s5p_mfc typec arizona_ldo1 hci_uart btqca s5p_jpeg max77693_haptic btbcm
> s3fwrn5_i2c exynos_gsc bluetooth s3fwrn5 nci v4l2_mem2mem nfc
> snd_soc_i2s snd_soc_idma snd_soc_hdmi_codec snd_soc_max98504
> snd_soc_s3c_dma videobuf2_dma_contig videobuf2_memops ecdh_generic
> snd_soc_core ir_spi videobuf2_v4l2 ecc snd_compress ntc_thermistor
> panfrost videodev snd_pcm_dmaengine snd_pcm rfkill drm_shmem_helper
> panel_samsung_s6e3ha2 videobuf2_common backlight pwrseq_core gpu_sched
> mc snd_timer snd soundcore ipv6
> CPU: 7 UID: 0 PID: 0 Comm: swapper/7 Not tainted 6.17.0-rc6+ #16012 PREEMPT
> Hardware name: Samsung TM2E board (DT)
> Hardware name: Samsung TM2E board (DT)
> Detected VIPT I-cache on CPU7
>
> ======================================================
> WARNING: possible circular locking dependency detected
> 6.17.0-rc6+ #16012 Not tainted
> ------------------------------------------------------
> swapper/7/0 is trying to acquire lock:
> ffff000024021cc8 (&irq_desc_lock_class){-.-.}-{2:2}, at:
> __irq_get_desc_lock+0x5c/0x9c
>
> but task is already holding lock:
> ffff800083e479c0 (&port_lock_key){-.-.}-{3:3}, at:
> s3c24xx_serial_console_write+0x80/0x268
>
> which lock already depends on the new lock.
>
>
> the existing dependency chain (in reverse order) is:
>
> -> #2 (&port_lock_key){-.-.}-{3:3}:
> _raw_spin_lock_irqsave+0x60/0x88
> s3c24xx_serial_console_write+0x80/0x268
> console_flush_all+0x304/0x49c
> console_unlock+0x70/0x110
> vprintk_emit+0x254/0x39c
> vprintk_default+0x38/0x44
> vprintk+0x28/0x34
> _printk+0x5c/0x84
> register_console+0x3ac/0x4f8
> serial_core_register_port+0x6c4/0x7a4
> serial_ctrl_register_port+0x10/0x1c
> uart_add_one_port+0x10/0x1c
> s3c24xx_serial_probe+0x34c/0x6d8
> platform_probe+0x5c/0xac
> really_probe+0xbc/0x298
> __driver_probe_device+0x78/0x12c
> driver_probe_device+0xdc/0x164
> __device_attach_driver+0xb8/0x138
> bus_for_each_drv+0x80/0xdc
> __device_attach+0xa8/0x1b0
> device_initial_probe+0x14/0x20
> bus_probe_device+0xb0/0xb4
> deferred_probe_work_func+0x8c/0xc8
> process_one_work+0x208/0x60c
> worker_thread+0x244/0x388
> kthread+0x150/0x228
> ret_from_fork+0x10/0x20
>
> -> #1 (console_owner){..-.}-{0:0}:
> console_lock_spinning_enable+0x6c/0x7c
> console_flush_all+0x2c8/0x49c
> console_unlock+0x70/0x110
> vprintk_emit+0x254/0x39c
> vprintk_default+0x38/0x44
> vprintk+0x28/0x34
> _printk+0x5c/0x84
> exynos_wkup_irq_set_wake+0x80/0xa4
> irq_set_irq_wake+0x164/0x1e0
> arizona_irq_set_wake+0x18/0x24
> irq_set_irq_wake+0x164/0x1e0
> regmap_irq_sync_unlock+0x328/0x530
> __irq_put_desc_unlock+0x48/0x4c
> irq_set_irq_wake+0x84/0x1e0
> arizona_set_irq_wake+0x5c/0x70
> wm5110_probe+0x220/0x354 [snd_soc_wm5110]
> platform_probe+0x5c/0xac
> really_probe+0xbc/0x298
> __driver_probe_device+0x78/0x12c
> driver_probe_device+0xdc/0x164
> __driver_attach+0x9c/0x1ac
> bus_for_each_dev+0x74/0xd0
> driver_attach+0x24/0x30
> bus_add_driver+0xe4/0x208
> driver_register+0x60/0x128
> __platform_driver_register+0x24/0x30
> cs_exit+0xc/0x20 [cpufreq_conservative]
> do_one_initcall+0x64/0x308
> do_init_module+0x58/0x23c
> load_module+0x1b48/0x1dc4
> init_module_from_file+0x84/0xc4
> idempotent_init_module+0x188/0x280
> __arm64_sys_finit_module+0x68/0xac
> invoke_syscall+0x48/0x110
> el0_svc_.common.c
>
> (system is frozen at this point).
So I've seen issues like this when testing scheduler changes,
particularly when I've added debug printks or WARN_ONs that trip while
we're deep in the scheduler core and hold various locks. I reported
something similar here:
https://lore.kernel.org/lkml/CANDhNCo8NRm4meR7vHqvP8vVZ-_GXVPuUKSO1wUQkKdfjvy20w@mail.gmail.com/
Now, usually I'll see the lockdep warning, and the hang is much more rare.
But I don't see right off how the dl_server change would affect this,
other than just changing the timing of execution such that you manage
to trip over the existing issue.
So far I don't see anything similar testing hotplug on x86 qemu. Do
you get any other console messages or warnings prior?
Looking at the backtrace, I wonder if changing the pr_info() in
exynos_wkup_irq_set_wake() to printk_deferred() might avoid this?
thanks
-john
On 23.09.2025 01:46, John Stultz wrote:
> On Mon, Sep 22, 2025 at 2:57 PM Marek Szyprowski
> <m.szyprowski@samsung.com> wrote:
>> This patch landed in today's linux-next as commit 077e1e2e0015
>> ("sched/deadline: Fix dl_server getting stuck"). In my tests I found
>> that it breaks CPU hotplug on some of my systems. On 64bit
>> Exynos5433-based TM2e board I've captured the following lock dep warning
>> (which unfortunately doesn't look like really related to CPU hotplug):
>>
> Huh. Nor does it really look related to the dl_server change. Interesting...
>
>
>> # for i in /sys/devices/system/cpu/cpu[1-9]; do echo 0 >$i/online; done
>> Detected VIPT I-cache on CPU7
>> CPU7: Booted secondary processor 0x0000000101 [0x410fd031]
>> ------------[ cut here ]------------
>> WARNING: CPU: 7 PID: 0 at kernel/rcu/tree.c:4329
>> rcutree_report_cpu_starting+0x1e8/0x348
>> Modules linked in: brcmfmac_wcc cpufreq_powersave cpufreq_conservative
>> brcmfmac brcmutil sha256 snd_soc_wm5110 cfg80211 snd_soc_wm_adsp cs_dsp
>> snd_soc_tm2_wm5110 snd_soc_arizona arizona_micsupp phy_exynos5_usbdrd
>> s5p_mfc typec arizona_ldo1 hci_uart btqca s5p_jpeg max77693_haptic btbcm
>> s3fwrn5_i2c exynos_gsc bluetooth s3fwrn5 nci v4l2_mem2mem nfc
>> snd_soc_i2s snd_soc_idma snd_soc_hdmi_codec snd_soc_max98504
>> snd_soc_s3c_dma videobuf2_dma_contig videobuf2_memops ecdh_generic
>> snd_soc_core ir_spi videobuf2_v4l2 ecc snd_compress ntc_thermistor
>> panfrost videodev snd_pcm_dmaengine snd_pcm rfkill drm_shmem_helper
>> panel_samsung_s6e3ha2 videobuf2_common backlight pwrseq_core gpu_sched
>> mc snd_timer snd soundcore ipv6
>> CPU: 7 UID: 0 PID: 0 Comm: swapper/7 Not tainted 6.17.0-rc6+ #16012 PREEMPT
>> Hardware name: Samsung TM2E board (DT)
>> Hardware name: Samsung TM2E board (DT)
>> Detected VIPT I-cache on CPU7
>>
>> ======================================================
>> WARNING: possible circular locking dependency detected
>> 6.17.0-rc6+ #16012 Not tainted
>> ------------------------------------------------------
>> swapper/7/0 is trying to acquire lock:
>> ffff000024021cc8 (&irq_desc_lock_class){-.-.}-{2:2}, at:
>> __irq_get_desc_lock+0x5c/0x9c
>>
>> but task is already holding lock:
>> ffff800083e479c0 (&port_lock_key){-.-.}-{3:3}, at:
>> s3c24xx_serial_console_write+0x80/0x268
>>
>> which lock already depends on the new lock.
>>
>>
>> the existing dependency chain (in reverse order) is:
>>
>> -> #2 (&port_lock_key){-.-.}-{3:3}:
>> _raw_spin_lock_irqsave+0x60/0x88
>> s3c24xx_serial_console_write+0x80/0x268
>> console_flush_all+0x304/0x49c
>> console_unlock+0x70/0x110
>> vprintk_emit+0x254/0x39c
>> vprintk_default+0x38/0x44
>> vprintk+0x28/0x34
>> _printk+0x5c/0x84
>> register_console+0x3ac/0x4f8
>> serial_core_register_port+0x6c4/0x7a4
>> serial_ctrl_register_port+0x10/0x1c
>> uart_add_one_port+0x10/0x1c
>> s3c24xx_serial_probe+0x34c/0x6d8
>> platform_probe+0x5c/0xac
>> really_probe+0xbc/0x298
>> __driver_probe_device+0x78/0x12c
>> driver_probe_device+0xdc/0x164
>> __device_attach_driver+0xb8/0x138
>> bus_for_each_drv+0x80/0xdc
>> __device_attach+0xa8/0x1b0
>> device_initial_probe+0x14/0x20
>> bus_probe_device+0xb0/0xb4
>> deferred_probe_work_func+0x8c/0xc8
>> process_one_work+0x208/0x60c
>> worker_thread+0x244/0x388
>> kthread+0x150/0x228
>> ret_from_fork+0x10/0x20
>>
>> -> #1 (console_owner){..-.}-{0:0}:
>> console_lock_spinning_enable+0x6c/0x7c
>> console_flush_all+0x2c8/0x49c
>> console_unlock+0x70/0x110
>> vprintk_emit+0x254/0x39c
>> vprintk_default+0x38/0x44
>> vprintk+0x28/0x34
>> _printk+0x5c/0x84
>> exynos_wkup_irq_set_wake+0x80/0xa4
>> irq_set_irq_wake+0x164/0x1e0
>> arizona_irq_set_wake+0x18/0x24
>> irq_set_irq_wake+0x164/0x1e0
>> regmap_irq_sync_unlock+0x328/0x530
>> __irq_put_desc_unlock+0x48/0x4c
>> irq_set_irq_wake+0x84/0x1e0
>> arizona_set_irq_wake+0x5c/0x70
>> wm5110_probe+0x220/0x354 [snd_soc_wm5110]
>> platform_probe+0x5c/0xac
>> really_probe+0xbc/0x298
>> __driver_probe_device+0x78/0x12c
>> driver_probe_device+0xdc/0x164
>> __driver_attach+0x9c/0x1ac
>> bus_for_each_dev+0x74/0xd0
>> driver_attach+0x24/0x30
>> bus_add_driver+0xe4/0x208
>> driver_register+0x60/0x128
>> __platform_driver_register+0x24/0x30
>> cs_exit+0xc/0x20 [cpufreq_conservative]
>> do_one_initcall+0x64/0x308
>> do_init_module+0x58/0x23c
>> load_module+0x1b48/0x1dc4
>> init_module_from_file+0x84/0xc4
>> idempotent_init_module+0x188/0x280
>> __arm64_sys_finit_module+0x68/0xac
>> invoke_syscall+0x48/0x110
>> el0_svc_.common.c
>>
>> (system is frozen at this point).
> So I've seen issues like this when testing scheduler changes,
> particularly when I've added debug printks or WARN_ONs that trip while
> we're deep in the scheduler core and hold various locks. I reported
> something similar here:
> https://lore.kernel.org/lkml/CANDhNCo8NRm4meR7vHqvP8vVZ-_GXVPuUKSO1wUQkKdfjvy20w@mail.gmail.com/
>
> Now, usually I'll see the lockdep warning, and the hang is much more rare.
>
> But I don't see right off how the dl_server change would affect this,
> other than just changing the timing of execution such that you manage
> to trip over the existing issue.
>
> So far I don't see anything similar testing hotplug on x86 qemu. Do
> you get any other console messages or warnings prior?
Nope. But the most suspicious message that is there is the 'CPU7: Booted
secondary processor 0x0000000101' line, which I got while off-lining all
non-zero CPUs.
> Looking at the backtrace, I wonder if changing the pr_info() in
> exynos_wkup_irq_set_wake() to printk_deferred() might avoid this?
I've removed that pr_info() from exynos_wkup_irq_set_wake() completely
and now I get the following warning:
# for i in /sys/devices/system/cpu/cpu[1-9]; do echo 0 >$i/online; done
# Detected VIPT I-cache on CPU7
CPU7: Booted secondary processor 0x0000000101 [0x410fd031]
------------[ cut here ]------------
WARNING: CPU: 7 PID: 0 at kernel/rcu/tree.c:4329
rcutree_report_cpu_starting+0x1e8/0x348
Modules linked in: brcmfmac_wcc brcmfmac brcmutil sha256
cpufreq_powersave cpufreq_conservative cfg80211 snd_soc_tm2_wm5110
hci_uart btqca btbcm s3fwrn5_i2c snd_soc_wm5110 bluetooth
arizona_micsupp phy_exynos5_usbdrd s3fwrn5 s5p_mfc nci typec
snd_soc_wm_adsp s5p_jpeg cs_dsp nfc ecdh_generic max77693_haptic
snd_soc_arizona arizona_ldo1 ecc rfkill snd_soc_i2s snd_soc_idma
snd_soc_max98504 snd_soc_hdmi_codec snd_soc_s3c_dma pwrseq_core
snd_soc_core exynos_gsc ir_spi v4l2_mem2mem videobuf2_dma_contig
videobuf2_memops snd_compress snd_pcm_dmaengine videobuf2_v4l2 videodev
ntc_thermistor snd_pcm panfrost videobuf2_common drm_shmem_helper
gpu_sched snd_timer mc panel_samsung_s6e3ha2 backlight snd soundcore ipv6
CPU: 7 UID: 0 PID: 0 Comm: swapper/7 Not tainted 6.17.0-rc6+ #16014
PREEMPT
Hardware name: Samsung TM2E board (DT)
Hardware name: Samsung TM2E board (DT)
Detected VIPT I-cache on CPU7
CPU7: Booted secondary processor 0x0000000103 [0x410fd031]
================================
WARNING: inconsistent lock state
6.17.0-rc6+ #16014 Not tainted
--------------------------------
inconsistent {IN-HARDIRQ-W} -> {HARDIRQ-ON-W} usage.
swapper/7/0 [HC0[0]:SC0[0]:HE0:SE1] takes:
ffff800083e479c0 (&port_lock_key){?.-.}-{3:3}, at:
s3c24xx_serial_console_write+0x80/0x268
{IN-HARDIRQ-W} state was registered at:
lock_acquire+0x1c8/0x354
_raw_spin_lock+0x48/0x60
s3c64xx_serial_handle_irq+0x6c/0x164
__handle_irq_event_percpu+0x9c/0x2d8
handle_irq_event+0x4c/0xac
handle_fasteoi_irq+0x108/0x198
handle_irq_desc+0x40/0x58
generic_handle_domain_irq+0x1c/0x28
gic_handle_irq+0x40/0xc8
call_on_irq_stack+0x30/0x48
do_interrupt_handler+0x80/0x84
el1_interrupt+0x34/0x64
el1h_64_irq_handler+0x18/0x24
el1h_64_irq+0x6c/0x70
default_idle_call+0xac/0x26c
do_idle+0x220/0x284
cpu_startup_entry+0x38/0x3c
rest_init+0xf4/0x184
start_kernel+0x70c/0x7d4
__primary_switched+0x88/0x90
irq event stamp: 63878
hardirqs last enabled at (63877): [<ffff800080121d2c>]
do_idle+0x220/0x284
hardirqs last disabled at (63878): [<ffff80008132f3a4>]
el1_brk64+0x1c/0x54
softirqs last enabled at (63812): [<ffff8000800c1164>]
handle_softirqs+0x4c4/0x4dc
softirqs last disabled at (63807): [<ffff800080010690>]
__do_softirq+0x14/0x20
other info that might help us debug this:
Possible unsafe locking scenario:
CPU0
----
lock(&port_lock_key);
<Interrupt>
lock(&port_lock_key);
*** DEADLOCK ***
5 locks held by swapper/7/0:
#0: ffff800082d0aa98 (console_lock){+.+.}-{0:0}, at:
vprintk_emit+0x150/0x39c
#1: ffff800082d0aaf0 (console_srcu){....}-{0:0}, at:
console_flush_all+0x78/0x49c
#2: ffff800082d0acb0 (console_owner){+.-.}-{0:0}, at:
console_lock_spinning_enable+0x48/0x7c
#3: ffff800082d0acd8
(printk_legacy_map-wait-type-override){+...}-{4:4}, at:
console_flush_all+0x2b0/0x49c
#4: ffff800083e479c0 (&port_lock_key){?.-.}-{3:3}, at:
s3c24xx_serial_console_write+0x80/0x268
stack backtrace:
CPU: 7 UID: 0 PID: 0 Comm: swapper/7 Not tainted 6.17.0-rc6+ #16014
PREEMPT
Hardware name: Samsung TM2E board (DT)
Call trace:
show_stack+0x18/0x24 (C)
dump_stack_lvl+0x90/0xd0
dump_stack+0x18/0x24
print_usage_bug.part.0+0x29c/0x358
mark_lock+0x7bc/0x960
mark_held_locks+0x58/0x90
lockdep_hardirqs_on_prepare+0x104/0x214
trace_hardirqs_on+0x58/0x1d8
secondary_start_kernel+0x134/0x160
__secondary_switched+0xc0/0xc4
------------[ cut here ]------------
WARNING: CPU: 7 PID: 0 at kernel/context_tracking.c:127
ct_kernel_exit.constprop.0+0x120/0x184
Modules linked in: brcmfmac_wcc brcmfmac brcmutil sha256
cpufreq_powersave cpufreq_conservative cfg80211 snd_soc_tm2_wm5110
hci_uart btqca btbcm s3fwrn5_i2c snd_soc_wm5110 bluetooth
arizona_micsupp phy_exynos5_usbdrd s3fwrn5 s5p_mfc nci typec
snd_soc_wm_adsp s5p_jpeg cs_dsp nfc ecdh_generic max77693_haptic
snd_soc_arizona arizona_ldo1 ecc rfkill snd_soc_i2s snd_soc_idma
snd_soc_max98504 snd_soc_hdmi_c
(no more messages, system frozen)
It looks that offlining CPUs 1-7 was successful (there is a prompt char
in the second line), but then CPU7 got somehow onlined again, what
causes this freeze.
Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland
On 18.09.25 08:56, tip-bot2 for Peter Zijlstra wrote:
> The following commit has been merged into the sched/urgent branch of tip:
>
> Commit-ID: 077e1e2e0015e5ba6538d1c5299fb299a3a92d60
> Gitweb: https://git.kernel.org/tip/077e1e2e0015e5ba6538d1c5299fb299a3a92d60
> Author: Peter Zijlstra <peterz@infradead.org>
> AuthorDate: Tue, 16 Sep 2025 23:02:41 +02:00
> Committer: Peter Zijlstra <peterz@infradead.org>
> CommitterDate: Thu, 18 Sep 2025 08:50:05 +02:00
>
> sched/deadline: Fix dl_server getting stuck
>
> John found it was easy to hit lockup warnings when running locktorture
> on a 2 CPU VM, which he bisected down to: commit cccb45d7c429
> ("sched/deadline: Less agressive dl_server handling").
>
> While debugging it seems there is a chance where we end up with the
> dl_server dequeued, with dl_se->dl_server_active. This causes
> dl_server_start() to return without enqueueing the dl_server, thus it
> fails to run when RT tasks starve the cpu.
>
> When this happens, dl_server_timer() catches the
> '!dl_se->server_has_tasks(dl_se)' case, which then calls
> replenish_dl_entity() and dl_server_stopped() and finally return
> HRTIMER_NO_RESTART.
>
> This ends in no new timer and also no enqueue, leaving the dl_server
> 'dead', allowing starvation.
>
> What should have happened is for the bandwidth timer to start the
> zero-laxity timer, which in turn would enqueue the dl_server and cause
> dl_se->server_pick_task() to be called -- which will stop the
> dl_server if no fair tasks are observed for a whole period.
>
> IOW, it is totally irrelevant if there are fair tasks at the moment of
> bandwidth refresh.
>
> This removes all dl_se->server_has_tasks() users, so remove the whole
> thing.
I see the same results like John running his locktorture test, the
'BUG: workqueue lockup' is gone now.
Just got confused because of these two remaining dl_server_has_tasks references:
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 73c7de26fa60..73d750292446 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -634,7 +634,6 @@ struct sched_rt_entity {
#endif
} __randomize_layout;
-typedef bool (*dl_server_has_tasks_f)(struct sched_dl_entity *);
typedef struct task_struct *(*dl_server_pick_f)(struct sched_dl_entity *);
struct sched_dl_entity {
@@ -728,9 +727,6 @@ struct sched_dl_entity {
* dl_server_update().
*
* @rq the runqueue this server is for
- *
- * @server_has_tasks() returns true if @server_pick return a
- * runnable task.
*/
struct rq *rq;
dl_server_pick_f server_pick_task;
Can you still tweak the patch to get rif of them with the patch?
[...]
© 2016 - 2025 Red Hat, Inc.