arch/x86/kernel/tsc.c | 2 -- kernel/sched/clock.c | 3 +++ kernel/sched/cputime.c | 8 ++++---- kernel/sched/sched.h | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-)
Read-mostly sched_clock_irqtime may share the same cacheline with
frequently updated nohz struct. Make it as static_key to avoid
false sharing issue.
The only user of disable_sched_clock_irqtime()
is tsc_.*mark_unstable() which may be invoked under atomic context
and require a workqueue to disable static_key. But both of them
calls clear_sched_clock_stable() just before doing
disable_sched_clock_irqtime(). We can reuse
"sched_clock_work" to also disable sched_clock_irqtime().
One additional case need to handle is if the tsc is marked unstable
before late_initcall() phase, sched_clock_work will not be invoked
and sched_clock_irqtime will stay enabled although clock is unstable:
tsc_init()
enable_sched_clock_irqtime() # irqtime accounting is enabled here
...
if (unsynchronized_tsc()) # true
mark_tsc_unstable()
clear_sched_clock_stable()
__sched_clock_stable_early = 0;
...
if (static_key_count(&sched_clock_running.key) == 2)
# Only happens at sched_clock_init_late()
__clear_sched_clock_stable(); # Never executed
...
# late_initcall() phase
sched_clock_init_late()
if (__sched_clock_stable_early) # Already false
__set_sched_clock_stable(); # sched_clock is never marked stable
# TSC unstable, but sched_clock_work won't run to disable irqtime
So we need to disable_sched_clock_irqtime() in sched_clock_init_late()
if clock is unstable.
Suggested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Suggested-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Reported-by: Benjamin Lei <benjamin.lei@intel.com>
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Reviewed-by: Tianyou Li <tianyou.li@intel.com>
Signed-off-by: Wangyang Guo <wangyang.guo@intel.com>
---
V5 -> v4:
- Changelog update to reflect static_key changes
V4 -> V3:
- Avoid creating a new workqueue to disable static_key
- Specify kernel version for c2c result in changelog
V2 -> V3:
- Use static_key instead of a __read_mostly var.
V1 -> V2:
- Use __read_mostly instead of __cacheline_aligned to avoid wasting
spaces.
History:
v4: https://lore.kernel.org/all/20260126021401.1490163-1-wangyang.guo@intel.com/
v3: https://lore.kernel.org/all/20260116023945.1849329-1-wangyang.guo@intel.com/
v2: https://lore.kernel.org/all/20260113074807.3404180-1-wangyang.guo@intel.com/
v1: https://lore.kernel.org/all/20260113022958.3379650-1-wangyang.guo@intel.com/
prev discussions: https://lore.kernel.org/all/20251211055612.4071266-1-wangyang.guo@intel.com/T/#u
---
arch/x86/kernel/tsc.c | 2 --
kernel/sched/clock.c | 3 +++
kernel/sched/cputime.c | 8 ++++----
kernel/sched/sched.h | 4 ++--
4 files changed, 9 insertions(+), 8 deletions(-)
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 87e749106dda..9a62e18d1bff 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1142,7 +1142,6 @@ static void tsc_cs_mark_unstable(struct clocksource *cs)
tsc_unstable = 1;
if (using_native_sched_clock())
clear_sched_clock_stable();
- disable_sched_clock_irqtime();
pr_info("Marking TSC unstable due to clocksource watchdog\n");
}
@@ -1212,7 +1211,6 @@ void mark_tsc_unstable(char *reason)
tsc_unstable = 1;
if (using_native_sched_clock())
clear_sched_clock_stable();
- disable_sched_clock_irqtime();
pr_info("Marking TSC unstable due to %s\n", reason);
clocksource_mark_unstable(&clocksource_tsc_early);
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index f5e6dd6a6b3a..2ae4fbf13431 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -173,6 +173,7 @@ notrace static void __sched_clock_work(struct work_struct *work)
scd->tick_gtod, __gtod_offset,
scd->tick_raw, __sched_clock_offset);
+ disable_sched_clock_irqtime();
static_branch_disable(&__sched_clock_stable);
}
@@ -238,6 +239,8 @@ static int __init sched_clock_init_late(void)
if (__sched_clock_stable_early)
__set_sched_clock_stable();
+ else
+ disable_sched_clock_irqtime(); /* disable if clock unstable. */
return 0;
}
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 7097de2c8cda..959a86206c64 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -12,6 +12,8 @@
#ifdef CONFIG_IRQ_TIME_ACCOUNTING
+DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime);
+
/*
* There are no locks covering percpu hardirq/softirq time.
* They are only modified in vtime_account, on corresponding CPU
@@ -25,16 +27,14 @@
*/
DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
-int sched_clock_irqtime;
-
void enable_sched_clock_irqtime(void)
{
- sched_clock_irqtime = 1;
+ static_branch_enable(&sched_clock_irqtime);
}
void disable_sched_clock_irqtime(void)
{
- sched_clock_irqtime = 0;
+ static_branch_disable(&sched_clock_irqtime);
}
static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index adfb6e3409d7..ec963314287a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3172,11 +3172,11 @@ struct irqtime {
};
DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-extern int sched_clock_irqtime;
+DECLARE_STATIC_KEY_FALSE(sched_clock_irqtime);
static inline int irqtime_enabled(void)
{
- return sched_clock_irqtime;
+ return static_branch_likely(&sched_clock_irqtime);
}
/*
--
2.47.3
Hello Wangyang, On 1/27/2026 8:46 AM, Wangyang Guo wrote: > @@ -238,6 +239,8 @@ static int __init sched_clock_init_late(void) > > if (__sched_clock_stable_early) > __set_sched_clock_stable(); > + else > + disable_sched_clock_irqtime(); /* disable if clock unstable. */ nit. I think we should check for irqtime_enabled() before since static_key_disable() would grab the cpus_read_lock() unnecessarily even if irqtime wasn't enabled - possible with PA-RISC where a slow processor registers the generic sched clock not eabling irqtime and then marks the sched_clock unstable on SMP which would hit this without having irqtime enabled. Same is case with "tsc=noirqtime" and then tsc turns unstable at early boot. -- Thanks and Regards, Prateek
On 1/27/2026 11:38 AM, K Prateek Nayak wrote: > Hello Wangyang, > > On 1/27/2026 8:46 AM, Wangyang Guo wrote: >> @@ -238,6 +239,8 @@ static int __init sched_clock_init_late(void) >> >> if (__sched_clock_stable_early) >> __set_sched_clock_stable(); >> + else >> + disable_sched_clock_irqtime(); /* disable if clock unstable. */ > > nit. I think we should check for irqtime_enabled() before since > static_key_disable() would grab the cpus_read_lock() unnecessarily even > if irqtime wasn't enabled - possible with PA-RISC where a slow processor > registers the generic sched clock not eabling irqtime and then marks the > sched_clock unstable on SMP which would hit this without having irqtime > enabled. > > Same is case with "tsc=noirqtime" and then tsc turns unstable at early > boot. Added in v6 as below: https://lore.kernel.org/all/20260127044159.2254247-1-wangyang.guo@intel.com/ @@ -238,6 +239,8 @@ static int __init sched_clock_init_late(void) if (__sched_clock_stable_early) __set_sched_clock_stable(); + else if (irqtime_enabled()) + disable_sched_clock_irqtime(); /* disable if clock unstable. */ return 0; }
© 2016 - 2026 Red Hat, Inc.