include/linux/clocksource.h | 1 + kernel/time/clocksource.c | 16 ++++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-)
The clocksource watchdog marks clocksources unstable from within a timer
handler. On x86, this marking involves an on_each_cpu_cond_mask(),
which in turn invokes smp_call_function_many_cond(), which may not be
invoked from a timer handler. Doing so results in:
WARNING: CPU: 3 PID: 0 at kernel/smp.c:815 smp_call_function_many_cond+0x46b/0x4c0
Fix this by deferring the marking to the clocksource watchdog kthread.
Note that marking unstable is already deferred, so deferring it a bit
more should be just fine.
Reported-by: Christian Heusel <christian@heusel.eu>
Closes: https://lore.kernel.org/all/46a200b4-1293-413c-8b73-37f7b50abd1a@heusel.eu/
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: John Stultz <jstultz@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Stephen Boyd <sboyd@kernel.org>
---
V1 -> V2: Fixed subject line.
include/linux/clocksource.h | 1 +
kernel/time/clocksource.c | 16 ++++++++++++----
2 files changed, 13 insertions(+), 4 deletions(-)
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 65b7c41471c39..a5317a4c07990 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -149,6 +149,7 @@ struct clocksource {
#define CLOCK_SOURCE_SUSPEND_NONSTOP 0x80
#define CLOCK_SOURCE_RESELECT 0x100
#define CLOCK_SOURCE_VERIFY_PERCPU 0x200
+#define CLOCK_SOURCE_DEFERRED_UNSTABLE 0x400
/* simplify initialization of mask field */
#define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0)
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 7304d7cf47f2d..60ace6f64c761 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -201,7 +201,8 @@ static void clocksource_change_rating(struct clocksource *cs, int rating)
static void __clocksource_unstable(struct clocksource *cs)
{
- cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG);
+ cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG |
+ CLOCK_SOURCE_DEFERRED_UNSTABLE);
cs->flags |= CLOCK_SOURCE_UNSTABLE;
/*
@@ -215,6 +216,11 @@ static void __clocksource_unstable(struct clocksource *cs)
if (cs->mark_unstable)
cs->mark_unstable(cs);
+}
+
+static void __clocksource_deferred_unstable(struct clocksource *cs)
+{
+ cs->flags |= CLOCK_SOURCE_DEFERRED_UNSTABLE;
/* kick clocksource_watchdog_kthread() */
if (finished_booting)
@@ -236,7 +242,7 @@ void clocksource_mark_unstable(struct clocksource *cs)
if (!(cs->flags & CLOCK_SOURCE_UNSTABLE)) {
if (!list_empty(&cs->list) && list_empty(&cs->wd_list))
list_add(&cs->wd_list, &watchdog_list);
- __clocksource_unstable(cs);
+ __clocksource_deferred_unstable(cs);
}
spin_unlock_irqrestore(&watchdog_lock, flags);
}
@@ -453,7 +459,7 @@ static void clocksource_watchdog(struct timer_list *unused)
if (read_ret == WD_READ_UNSTABLE) {
/* Clock readout unreliable, so give it up. */
- __clocksource_unstable(cs);
+ __clocksource_deferred_unstable(cs);
continue;
}
@@ -538,7 +544,7 @@ static void clocksource_watchdog(struct timer_list *unused)
pr_warn(" '%s' (not '%s') is current clocksource.\n", curr_clocksource->name, cs->name);
else
pr_warn(" No current clocksource.\n");
- __clocksource_unstable(cs);
+ __clocksource_deferred_unstable(cs);
continue;
}
@@ -703,6 +709,8 @@ static int __clocksource_watchdog_kthread(void)
spin_lock_irqsave(&watchdog_lock, flags);
list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
+ if (cs->flags & CLOCK_SOURCE_DEFERRED_UNSTABLE)
+ __clocksource_unstable(cs);
if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
list_del_init(&cs->wd_list);
clocksource_change_rating(cs, 0);
On Thu, Mar 06 2025 at 08:06, Paul E. McKenney wrote:
> The clocksource watchdog marks clocksources unstable from within a timer
> handler. On x86, this marking involves an on_each_cpu_cond_mask(),
> which in turn invokes smp_call_function_many_cond(), which may not be
> invoked from a timer handler. Doing so results in:
>
> WARNING: CPU: 3 PID: 0 at kernel/smp.c:815 smp_call_function_many_cond+0x46b/0x4c0
>
> Fix this by deferring the marking to the clocksource watchdog kthread.
> Note that marking unstable is already deferred, so deferring it a bit
> more should be just fine.
While this can be done, that's papering over the underlying problem,
which was introduced with:
8722903cbb8f ("sched: Define sched_clock_irqtime as static key")
That added the static key switch, which is causing the problem. And
"fixing" this in the clocksource watchdog is incomplete because the same
problem exists during CPU hotplug when the TSC synchronization declares
the TSC unstable. It's the exactly same problem as was fixed via:
6577e42a3e16 ("sched/clock: Fix up clear_sched_clock_stable()")
So as this got introduced in the 6.14 merge window, the proper fix is to
revert commit 8722903cbb8f and send it back to the drawing board. It was
clearly never tested with the various possibilities which invoke
mark_tsc*_unstable().
Thanks,
tglx
On Sun, Mar 9, 2025 at 12:38 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>
> On Thu, Mar 06 2025 at 08:06, Paul E. McKenney wrote:
> > The clocksource watchdog marks clocksources unstable from within a timer
> > handler. On x86, this marking involves an on_each_cpu_cond_mask(),
> > which in turn invokes smp_call_function_many_cond(), which may not be
> > invoked from a timer handler. Doing so results in:
> >
> > WARNING: CPU: 3 PID: 0 at kernel/smp.c:815 smp_call_function_many_cond+0x46b/0x4c0
> >
> > Fix this by deferring the marking to the clocksource watchdog kthread.
> > Note that marking unstable is already deferred, so deferring it a bit
> > more should be just fine.
>
> While this can be done, that's papering over the underlying problem,
> which was introduced with:
>
> 8722903cbb8f ("sched: Define sched_clock_irqtime as static key")
>
> That added the static key switch, which is causing the problem. And
> "fixing" this in the clocksource watchdog is incomplete because the same
> problem exists during CPU hotplug when the TSC synchronization declares
> the TSC unstable. It's the exactly same problem as was fixed via:
>
> 6577e42a3e16 ("sched/clock: Fix up clear_sched_clock_stable()")
>
> So as this got introduced in the 6.14 merge window, the proper fix is to
> revert commit 8722903cbb8f and send it back to the drawing board. It was
> clearly never tested with the various possibilities which invoke
> mark_tsc*_unstable().
Hello Thomas,
It has been reverted by the following commit
b9f2b29b9494 ("sched: Don't define sched_clock_irqtime as static key")
https://web.git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?id=b9f2b29b94943b08157e3dfc970baabc7944dbc3
--
Regards
Yafang
On Sun, Mar 09 2025 at 19:36, Yafang Shao wrote:
> On Sun, Mar 9, 2025 at 12:38 AM Thomas Gleixner <tglx@linutronix.de> wrote:
>> So as this got introduced in the 6.14 merge window, the proper fix is to
>> revert commit 8722903cbb8f and send it back to the drawing board. It was
>> clearly never tested with the various possibilities which invoke
>> mark_tsc*_unstable().
>
> Hello Thomas,
>
> It has been reverted by the following commit
> b9f2b29b9494 ("sched: Don't define sched_clock_irqtime as static key")
That does not help much because the commit is in the sched/core branch,
which is scheduled for the next merge window. But this wants to be fixed
in Linus tree before 6.14 final. Peter?
Thanks,
tglx
On Sun, Mar 09, 2025 at 05:07:43PM +0100, Thomas Gleixner wrote:
> On Sun, Mar 09 2025 at 19:36, Yafang Shao wrote:
> > On Sun, Mar 9, 2025 at 12:38 AM Thomas Gleixner <tglx@linutronix.de> wrote:
> >> So as this got introduced in the 6.14 merge window, the proper fix is to
> >> revert commit 8722903cbb8f and send it back to the drawing board. It was
> >> clearly never tested with the various possibilities which invoke
> >> mark_tsc*_unstable().
> >
> > Hello Thomas,
> >
> > It has been reverted by the following commit
> > b9f2b29b9494 ("sched: Don't define sched_clock_irqtime as static key")
>
> That does not help much because the commit is in the sched/core branch,
> which is scheduled for the next merge window. But this wants to be fixed
> in Linus tree before 6.14 final. Peter?
Bah, sometimes I get confused by what's where.
I suppose I can pick that one commit into sched/urgent, hmm?
On Sun, Mar 09, 2025 at 07:36:05PM +0800, Yafang Shao wrote:
> On Sun, Mar 9, 2025 at 12:38 AM Thomas Gleixner <tglx@linutronix.de> wrote:
> >
> > On Thu, Mar 06 2025 at 08:06, Paul E. McKenney wrote:
> > > The clocksource watchdog marks clocksources unstable from within a timer
> > > handler. On x86, this marking involves an on_each_cpu_cond_mask(),
> > > which in turn invokes smp_call_function_many_cond(), which may not be
> > > invoked from a timer handler. Doing so results in:
> > >
> > > WARNING: CPU: 3 PID: 0 at kernel/smp.c:815 smp_call_function_many_cond+0x46b/0x4c0
> > >
> > > Fix this by deferring the marking to the clocksource watchdog kthread.
> > > Note that marking unstable is already deferred, so deferring it a bit
> > > more should be just fine.
> >
> > While this can be done, that's papering over the underlying problem,
> > which was introduced with:
> >
> > 8722903cbb8f ("sched: Define sched_clock_irqtime as static key")
> >
> > That added the static key switch, which is causing the problem. And
> > "fixing" this in the clocksource watchdog is incomplete because the same
> > problem exists during CPU hotplug when the TSC synchronization declares
> > the TSC unstable. It's the exactly same problem as was fixed via:
> >
> > 6577e42a3e16 ("sched/clock: Fix up clear_sched_clock_stable()")
> >
> > So as this got introduced in the 6.14 merge window, the proper fix is to
> > revert commit 8722903cbb8f and send it back to the drawing board. It was
> > clearly never tested with the various possibilities which invoke
> > mark_tsc*_unstable().
>
> Hello Thomas,
>
> It has been reverted by the following commit
> b9f2b29b9494 ("sched: Don't define sched_clock_irqtime as static key")
>
> https://web.git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?id=b9f2b29b94943b08157e3dfc970baabc7944dbc3
Thank you! I will drop my commit on my next rebase.
Thanx, Paul
© 2016 - 2026 Red Hat, Inc.