[PATCH v7] sched/clock: Avoid false sharing for sched_clock_irqtime

Wangyang Guo posted 1 patch 1 week, 3 days ago
arch/x86/kernel/tsc.c  | 2 --
kernel/sched/clock.c   | 3 +++
kernel/sched/cputime.c | 9 +++++----
kernel/sched/sched.h   | 4 ++--
4 files changed, 10 insertions(+), 8 deletions(-)
[PATCH v7] sched/clock: Avoid false sharing for sched_clock_irqtime
Posted by Wangyang Guo 1 week, 3 days ago
Read-mostly sched_clock_irqtime may share the same cacheline with
frequently updated nohz struct. Make it as static_key to avoid
false sharing issue.

The only user of disable_sched_clock_irqtime()
is tsc_.*mark_unstable() which may be invoked under atomic context
and require a workqueue to disable static_key. But both of them
calls clear_sched_clock_stable() just before doing
disable_sched_clock_irqtime(). We can reuse
"sched_clock_work" to also disable sched_clock_irqtime().

One additional case need to handle is if the tsc is marked unstable
before late_initcall() phase, sched_clock_work will not be invoked
and sched_clock_irqtime will stay enabled although clock is unstable:
  tsc_init()
    enable_sched_clock_irqtime() # irqtime accounting is enabled here
    ...
    if (unsynchronized_tsc()) # true
      mark_tsc_unstable()
        clear_sched_clock_stable()
          __sched_clock_stable_early = 0;
          ...
          if (static_key_count(&sched_clock_running.key) == 2)
            # Only happens at sched_clock_init_late()
            __clear_sched_clock_stable(); # Never executed
  ...

  # late_initcall() phase
  sched_clock_init_late()
    if (__sched_clock_stable_early) # Already false
      __set_sched_clock_stable(); # sched_clock is never marked stable
  # TSC unstable, but sched_clock_work won't run to disable irqtime

So we need to disable_sched_clock_irqtime() in sched_clock_init_late()
if clock is unstable.

Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Suggested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Suggested-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Reported-by: Benjamin Lei <benjamin.lei@intel.com>
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Reviewed-by: Tianyou Li <tianyou.li@intel.com>
Signed-off-by: Wangyang Guo <wangyang.guo@intel.com>
---
v7 -> v6:
- move irqtime_enabled() check to disable_sched_clock_irqtime()

v6 -> v5:
- Only disable_sched_clock_irqtime() if irqtime_enabled() in
  sched_lock_init_late() to avoid unnessary overhead.

V5 -> v4:
- Changelog update to reflect static_key changes

V4 -> V3:
- Avoid creating a new workqueue to disable static_key
- Specify kernel version for c2c result in changelog

V2 -> V3:
- Use static_key instead of a __read_mostly var.

V1 -> V2:
- Use __read_mostly instead of __cacheline_aligned to avoid wasting
  spaces.

History:
  v6: https://lore.kernel.org/all/20260127044159.2254247-1-wangyang.guo@intel.com/
  v5: https://lore.kernel.org/all/20260127031602.1907377-1-wangyang.guo@intel.com/
  v4: https://lore.kernel.org/all/20260126021401.1490163-1-wangyang.guo@intel.com/
  v3: https://lore.kernel.org/all/20260116023945.1849329-1-wangyang.guo@intel.com/
  v2: https://lore.kernel.org/all/20260113074807.3404180-1-wangyang.guo@intel.com/
  v1: https://lore.kernel.org/all/20260113022958.3379650-1-wangyang.guo@intel.com/
  prev discussions: https://lore.kernel.org/all/20251211055612.4071266-1-wangyang.guo@intel.com/T/#u
---
 arch/x86/kernel/tsc.c  | 2 --
 kernel/sched/clock.c   | 3 +++
 kernel/sched/cputime.c | 9 +++++----
 kernel/sched/sched.h   | 4 ++--
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 87e749106dda..9a62e18d1bff 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1142,7 +1142,6 @@ static void tsc_cs_mark_unstable(struct clocksource *cs)
 	tsc_unstable = 1;
 	if (using_native_sched_clock())
 		clear_sched_clock_stable();
-	disable_sched_clock_irqtime();
 	pr_info("Marking TSC unstable due to clocksource watchdog\n");
 }
 
@@ -1212,7 +1211,6 @@ void mark_tsc_unstable(char *reason)
 	tsc_unstable = 1;
 	if (using_native_sched_clock())
 		clear_sched_clock_stable();
-	disable_sched_clock_irqtime();
 	pr_info("Marking TSC unstable due to %s\n", reason);
 
 	clocksource_mark_unstable(&clocksource_tsc_early);
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index f5e6dd6a6b3a..2ae4fbf13431 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -173,6 +173,7 @@ notrace static void __sched_clock_work(struct work_struct *work)
 			scd->tick_gtod, __gtod_offset,
 			scd->tick_raw,  __sched_clock_offset);
 
+	disable_sched_clock_irqtime();
 	static_branch_disable(&__sched_clock_stable);
 }
 
@@ -238,6 +239,8 @@ static int __init sched_clock_init_late(void)
 
 	if (__sched_clock_stable_early)
 		__set_sched_clock_stable();
+	else
+		disable_sched_clock_irqtime();  /* disable if clock unstable. */
 
 	return 0;
 }
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 7097de2c8cda..556a70f344d0 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -12,6 +12,8 @@
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 
+DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime);
+
 /*
  * There are no locks covering percpu hardirq/softirq time.
  * They are only modified in vtime_account, on corresponding CPU
@@ -25,16 +27,15 @@
  */
 DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
 
-int sched_clock_irqtime;
-
 void enable_sched_clock_irqtime(void)
 {
-	sched_clock_irqtime = 1;
+	static_branch_enable(&sched_clock_irqtime);
 }
 
 void disable_sched_clock_irqtime(void)
 {
-	sched_clock_irqtime = 0;
+	if (irqtime_enabled())
+		static_branch_disable(&sched_clock_irqtime);
 }
 
 static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index adfb6e3409d7..ec963314287a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3172,11 +3172,11 @@ struct irqtime {
 };
 
 DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-extern int sched_clock_irqtime;
+DECLARE_STATIC_KEY_FALSE(sched_clock_irqtime);
 
 static inline int irqtime_enabled(void)
 {
-	return sched_clock_irqtime;
+	return static_branch_likely(&sched_clock_irqtime);
 }
 
 /*
-- 
2.47.3
Re: [PATCH v7] sched/clock: Avoid false sharing for sched_clock_irqtime
Posted by Vincent Guittot 3 days, 19 hours ago
On Tue, 27 Jan 2026 at 08:28, Wangyang Guo <wangyang.guo@intel.com> wrote:
>
> Read-mostly sched_clock_irqtime may share the same cacheline with
> frequently updated nohz struct. Make it as static_key to avoid
> false sharing issue.
>
> The only user of disable_sched_clock_irqtime()
> is tsc_.*mark_unstable() which may be invoked under atomic context
> and require a workqueue to disable static_key. But both of them
> calls clear_sched_clock_stable() just before doing
> disable_sched_clock_irqtime(). We can reuse
> "sched_clock_work" to also disable sched_clock_irqtime().
>
> One additional case need to handle is if the tsc is marked unstable
> before late_initcall() phase, sched_clock_work will not be invoked
> and sched_clock_irqtime will stay enabled although clock is unstable:
>   tsc_init()
>     enable_sched_clock_irqtime() # irqtime accounting is enabled here
>     ...
>     if (unsynchronized_tsc()) # true
>       mark_tsc_unstable()
>         clear_sched_clock_stable()
>           __sched_clock_stable_early = 0;
>           ...
>           if (static_key_count(&sched_clock_running.key) == 2)
>             # Only happens at sched_clock_init_late()
>             __clear_sched_clock_stable(); # Never executed
>   ...
>
>   # late_initcall() phase
>   sched_clock_init_late()
>     if (__sched_clock_stable_early) # Already false
>       __set_sched_clock_stable(); # sched_clock is never marked stable
>   # TSC unstable, but sched_clock_work won't run to disable irqtime
>
> So we need to disable_sched_clock_irqtime() in sched_clock_init_late()
> if clock is unstable.
>
> Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com>
> Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
> Suggested-by: K Prateek Nayak <kprateek.nayak@amd.com>
> Suggested-by: Peter Zijlstra <peterz@infradead.org>
> Suggested-by: Shrikanth Hegde <sshegde@linux.ibm.com>
> Reported-by: Benjamin Lei <benjamin.lei@intel.com>
> Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
> Reviewed-by: Tianyou Li <tianyou.li@intel.com>
> Signed-off-by: Wangyang Guo <wangyang.guo@intel.com>

Acked-by: Vincent Guittot <vincent.guittot@linaro.org>

> ---
> v7 -> v6:
> - move irqtime_enabled() check to disable_sched_clock_irqtime()
>
> v6 -> v5:
> - Only disable_sched_clock_irqtime() if irqtime_enabled() in
>   sched_lock_init_late() to avoid unnessary overhead.
>
> V5 -> v4:
> - Changelog update to reflect static_key changes
>
> V4 -> V3:
> - Avoid creating a new workqueue to disable static_key
> - Specify kernel version for c2c result in changelog
>
> V2 -> V3:
> - Use static_key instead of a __read_mostly var.
>
> V1 -> V2:
> - Use __read_mostly instead of __cacheline_aligned to avoid wasting
>   spaces.
>
> History:
>   v6: https://lore.kernel.org/all/20260127044159.2254247-1-wangyang.guo@intel.com/
>   v5: https://lore.kernel.org/all/20260127031602.1907377-1-wangyang.guo@intel.com/
>   v4: https://lore.kernel.org/all/20260126021401.1490163-1-wangyang.guo@intel.com/
>   v3: https://lore.kernel.org/all/20260116023945.1849329-1-wangyang.guo@intel.com/
>   v2: https://lore.kernel.org/all/20260113074807.3404180-1-wangyang.guo@intel.com/
>   v1: https://lore.kernel.org/all/20260113022958.3379650-1-wangyang.guo@intel.com/
>   prev discussions: https://lore.kernel.org/all/20251211055612.4071266-1-wangyang.guo@intel.com/T/#u
> ---
>  arch/x86/kernel/tsc.c  | 2 --
>  kernel/sched/clock.c   | 3 +++
>  kernel/sched/cputime.c | 9 +++++----
>  kernel/sched/sched.h   | 4 ++--
>  4 files changed, 10 insertions(+), 8 deletions(-)
>
> diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
> index 87e749106dda..9a62e18d1bff 100644
> --- a/arch/x86/kernel/tsc.c
> +++ b/arch/x86/kernel/tsc.c
> @@ -1142,7 +1142,6 @@ static void tsc_cs_mark_unstable(struct clocksource *cs)
>         tsc_unstable = 1;
>         if (using_native_sched_clock())
>                 clear_sched_clock_stable();
> -       disable_sched_clock_irqtime();
>         pr_info("Marking TSC unstable due to clocksource watchdog\n");
>  }
>
> @@ -1212,7 +1211,6 @@ void mark_tsc_unstable(char *reason)
>         tsc_unstable = 1;
>         if (using_native_sched_clock())
>                 clear_sched_clock_stable();
> -       disable_sched_clock_irqtime();
>         pr_info("Marking TSC unstable due to %s\n", reason);
>
>         clocksource_mark_unstable(&clocksource_tsc_early);
> diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
> index f5e6dd6a6b3a..2ae4fbf13431 100644
> --- a/kernel/sched/clock.c
> +++ b/kernel/sched/clock.c
> @@ -173,6 +173,7 @@ notrace static void __sched_clock_work(struct work_struct *work)
>                         scd->tick_gtod, __gtod_offset,
>                         scd->tick_raw,  __sched_clock_offset);
>
> +       disable_sched_clock_irqtime();
>         static_branch_disable(&__sched_clock_stable);
>  }
>
> @@ -238,6 +239,8 @@ static int __init sched_clock_init_late(void)
>
>         if (__sched_clock_stable_early)
>                 __set_sched_clock_stable();
> +       else
> +               disable_sched_clock_irqtime();  /* disable if clock unstable. */
>
>         return 0;
>  }
> diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
> index 7097de2c8cda..556a70f344d0 100644
> --- a/kernel/sched/cputime.c
> +++ b/kernel/sched/cputime.c
> @@ -12,6 +12,8 @@
>
>  #ifdef CONFIG_IRQ_TIME_ACCOUNTING
>
> +DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime);
> +
>  /*
>   * There are no locks covering percpu hardirq/softirq time.
>   * They are only modified in vtime_account, on corresponding CPU
> @@ -25,16 +27,15 @@
>   */
>  DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
>
> -int sched_clock_irqtime;
> -
>  void enable_sched_clock_irqtime(void)
>  {
> -       sched_clock_irqtime = 1;
> +       static_branch_enable(&sched_clock_irqtime);
>  }
>
>  void disable_sched_clock_irqtime(void)
>  {
> -       sched_clock_irqtime = 0;
> +       if (irqtime_enabled())
> +               static_branch_disable(&sched_clock_irqtime);
>  }
>
>  static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index adfb6e3409d7..ec963314287a 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -3172,11 +3172,11 @@ struct irqtime {
>  };
>
>  DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
> -extern int sched_clock_irqtime;
> +DECLARE_STATIC_KEY_FALSE(sched_clock_irqtime);
>
>  static inline int irqtime_enabled(void)
>  {
> -       return sched_clock_irqtime;
> +       return static_branch_likely(&sched_clock_irqtime);
>  }
>
>  /*
> --
> 2.47.3
>
Re: [PATCH v7] sched/clock: Avoid false sharing for sched_clock_irqtime
Posted by Shrikanth Hegde 1 week, 3 days ago
Hi Wangyang, Prateek.

On 1/27/26 12:55 PM, Wangyang Guo wrote:
> Read-mostly sched_clock_irqtime may share the same cacheline with
> frequently updated nohz struct. Make it as static_key to avoid
> false sharing issue.
> 
> The only user of disable_sched_clock_irqtime()
> is tsc_.*mark_unstable() which may be invoked under atomic context
> and require a workqueue to disable static_key. But both of them
> calls clear_sched_clock_stable() just before doing
> disable_sched_clock_irqtime(). We can reuse
> "sched_clock_work" to also disable sched_clock_irqtime().
> 
> One additional case need to handle is if the tsc is marked unstable
> before late_initcall() phase, sched_clock_work will not be invoked
> and sched_clock_irqtime will stay enabled although clock is unstable:
>    tsc_init()
>      enable_sched_clock_irqtime() # irqtime accounting is enabled here
>      ...
>      if (unsynchronized_tsc()) # true
>        mark_tsc_unstable()
>          clear_sched_clock_stable()
>            __sched_clock_stable_early = 0;
>            ...
>            if (static_key_count(&sched_clock_running.key) == 2)
>              # Only happens at sched_clock_init_late()
>              __clear_sched_clock_stable(); # Never executed
>    ...
> 
>    # late_initcall() phase
>    sched_clock_init_late()
>      if (__sched_clock_stable_early) # Already false
>        __set_sched_clock_stable(); # sched_clock is never marked stable
>    # TSC unstable, but sched_clock_work won't run to disable irqtime
> 
> So we need to disable_sched_clock_irqtime() in sched_clock_init_late()
> if clock is unstable.
> 

Do you this as a valid case? have you tested with CONFIG_PARAVIRT?

Lets say you have a non native sched clock such as kvm_sched_clock_read.
tsc_init -> sets enable_sched_clock_irqtime()
          ->mark_tsc_unstable -> if using_native_sched_clock -> clear_sched_clock_stable

In this case, since clear_sched_clock_stable won't be called you may not disable the
sched clock irqtime since __sched_clock_stable_early is reset only in clear_sched_clock_stable


Bigger concern(maybe) is clock source marked as stable still, though called mark_tsc_unstable in
non native sched clock?

Disclaimer: (just curious, seeing this x86 code for first time, so may not know all paths)
Re: [PATCH v7] sched/clock: Avoid false sharing for sched_clock_irqtime
Posted by Guo, Wangyang 1 week, 3 days ago
On 1/27/2026 7:04 PM, Shrikanth Hegde wrote:
> Hi Wangyang, Prateek.
> 
> On 1/27/26 12:55 PM, Wangyang Guo wrote:
>> Read-mostly sched_clock_irqtime may share the same cacheline with
>> frequently updated nohz struct. Make it as static_key to avoid
>> false sharing issue.
>>
>> The only user of disable_sched_clock_irqtime()
>> is tsc_.*mark_unstable() which may be invoked under atomic context
>> and require a workqueue to disable static_key. But both of them
>> calls clear_sched_clock_stable() just before doing
>> disable_sched_clock_irqtime(). We can reuse
>> "sched_clock_work" to also disable sched_clock_irqtime().
>>
>> One additional case need to handle is if the tsc is marked unstable
>> before late_initcall() phase, sched_clock_work will not be invoked
>> and sched_clock_irqtime will stay enabled although clock is unstable:
>>    tsc_init()
>>      enable_sched_clock_irqtime() # irqtime accounting is enabled here
>>      ...
>>      if (unsynchronized_tsc()) # true
>>        mark_tsc_unstable()
>>          clear_sched_clock_stable()
>>            __sched_clock_stable_early = 0;
>>            ...
>>            if (static_key_count(&sched_clock_running.key) == 2)
>>              # Only happens at sched_clock_init_late()
>>              __clear_sched_clock_stable(); # Never executed
>>    ...
>>
>>    # late_initcall() phase
>>    sched_clock_init_late()
>>      if (__sched_clock_stable_early) # Already false
>>        __set_sched_clock_stable(); # sched_clock is never marked stable
>>    # TSC unstable, but sched_clock_work won't run to disable irqtime
>>
>> So we need to disable_sched_clock_irqtime() in sched_clock_init_late()
>> if clock is unstable.
>>
> 
> Do you this as a valid case? have you tested with CONFIG_PARAVIRT?
> 
> Lets say you have a non native sched clock such as kvm_sched_clock_read.
> tsc_init -> sets enable_sched_clock_irqtime()
>           ->mark_tsc_unstable -> if using_native_sched_clock -> 
> clear_sched_clock_stable
> 
> In this case, since clear_sched_clock_stable won't be called you may not 
> disable the
> sched clock irqtime since __sched_clock_stable_early is reset only in 
> clear_sched_clock_stable

For hypervisor, I see this path may call clear_sched_clock_stable when 
clock is unstable at init:

   kvm_init_platform() ->
   kvmclock_init() -> kvm_sched_clock_init(stable):
     if (!stable) clear_sched_clock_stable()
     paravirt_set_sched_clock(kvm_sched_clock_read)
> 
> Bigger concern(maybe) is clock source marked as stable still, though 
> called mark_tsc_unstable in
> non native sched clock?
> 
> Disclaimer: (just curious, seeing this x86 code for first time, so may 
> not know all paths)
> 

Yes, when clock mark unstable through tsc_.*mark_unstable() with 
non-native_sched_clock, clear_sched_clock_stable won't be called, thus 
sched_clock_irqtime still keep enabled.

Maybe the dedicated workqueue for sched_clock_irqtime is still needed 
considering this case.
Re: [PATCH v7] sched/clock: Avoid false sharing for sched_clock_irqtime
Posted by K Prateek Nayak 1 week, 3 days ago
On 1/28/2026 7:49 AM, Guo, Wangyang wrote:
> Yes, when clock mark unstable through tsc_.*mark_unstable() with non-native_sched_clock, clear_sched_clock_stable won't be called, thus sched_clock_irqtime still keep enabled.
> 
> Maybe the dedicated workqueue for sched_clock_irqtime is still needed considering this case.

In that case, shouldn't tsc_init() only enable irqtime when
using_native_sched_clock()? How can tsc_init() make a call on irqtime if
TSC isn't being used as the sched_clock() ultimately?

For kvmclock, if PVCLOCK_TSC_STABLE_BIT is not set, it'll call
clear_sched_clock_stable() at kvm_sched_clock_init() but none of the
other clocksources do so we can assume once we override the sched_clock()
it is up to the sched_clock() provider to deal with the clock stability.

-- 
Thanks and Regards,
Prateek
Re: [PATCH v7] sched/clock: Avoid false sharing for sched_clock_irqtime
Posted by Shrikanth Hegde 1 week, 2 days ago

On 1/28/26 8:35 AM, K Prateek Nayak wrote:
> On 1/28/2026 7:49 AM, Guo, Wangyang wrote:
>> Yes, when clock mark unstable through tsc_.*mark_unstable() with non-native_sched_clock, clear_sched_clock_stable won't be called, thus sched_clock_irqtime still keep enabled.
>>
>> Maybe the dedicated workqueue for sched_clock_irqtime is still needed considering this case.
> 
> In that case, shouldn't tsc_init() only enable irqtime when
> using_native_sched_clock()? How can tsc_init() make a call on irqtime if
> TSC isn't being used as the sched_clock() ultimately?
> 
> For kvmclock, if PVCLOCK_TSC_STABLE_BIT is not set, it'll call
> clear_sched_clock_stable() at kvm_sched_clock_init() but none of the
> other clocksources do so we can assume once we override the sched_clock()
> it is up to the sched_clock() provider to deal with the clock stability.
> 

I think this would depend if mark_tsc_unstable happens after system boot,
specially while running kvm guest?
Re: [PATCH v7] sched/clock: Avoid false sharing for sched_clock_irqtime
Posted by K Prateek Nayak 1 week, 2 days ago
On 1/28/2026 11:56 AM, Shrikanth Hegde wrote:
> 
> 
> On 1/28/26 8:35 AM, K Prateek Nayak wrote:
>> On 1/28/2026 7:49 AM, Guo, Wangyang wrote:
>>> Yes, when clock mark unstable through tsc_.*mark_unstable() with non-native_sched_clock, clear_sched_clock_stable won't be called, thus sched_clock_irqtime still keep enabled.
>>>
>>> Maybe the dedicated workqueue for sched_clock_irqtime is still needed considering this case.
>>
>> In that case, shouldn't tsc_init() only enable irqtime when
>> using_native_sched_clock()? How can tsc_init() make a call on irqtime if
>> TSC isn't being used as the sched_clock() ultimately?
>>
>> For kvmclock, if PVCLOCK_TSC_STABLE_BIT is not set, it'll call
>> clear_sched_clock_stable() at kvm_sched_clock_init() but none of the
>> other clocksources do so we can assume once we override the sched_clock()
>> it is up to the sched_clock() provider to deal with the clock stability.
>>
> 
> I think this would depend if mark_tsc_unstable happens after system boot,
> specially while running kvm guest?

I don't see anything on the guest side that would mark the kvmclock as
unstable if host's TSC turns unstable post init and since kvmclock
doesn't set CLOCK_SOURCE_MUST_VERIFY, I doubt if a watchdog runs to
verify it in the guest.

I have the following in the guest:

    $ sudo dmesg | grep -i clock
    [    0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
    [    0.000000] kvm-clock: using sched offset of 423259259 cycles
    [    0.000002] clocksource: kvm-clock: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns
    [    0.071675] clocksource: refined-jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645519600211568 ns
    [    0.378467] clocksource: hpet: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 19112604467 ns
    [    0.388678] clocksource: tsc-early: mask: 0xffffffffffffffff max_cycles: 0x398cb1e4d56, max_idle_ns: 881590790753 ns
    [    0.679262] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns
    [    0.903121] PTP clock support registered
    [    0.927243] clocksource: Switched to clocksource kvm-clock
    [    0.944986] clocksource: acpi_pm: mask: 0xffffff max_cycles: 0xffffff, max_idle_ns: 2085701024 ns
    [    0.993198] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x398cb1e4d56, max_idle_ns: 881590790753 ns
    [    1.123796] rtc_cmos 00:05: setting system clock to 2026-01-28T07:03:45 UTC (1769583825)
    [    1.155755] sched_clock: Marking stable (940009972, 212965288)->(1171254846, -18279586)
    [    1.712598] clk: Disabling unused clocks

Then I mark TSC unstable on the host

    tsc: Marking TSC unstable due to Faking unreliable TSC!
    TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.
    clocksource: Checking clocksource tsc synchronization from CPU 93 to CPUs 0,2,26,75,101,114,118,195.
    sched_clock: Marking unstable (945948313746, 69389667)<-(947618130068, -1600430832)
    clocksource:         CPU 93 check durations 3436ns - 25277ns for clocksource tsc.
    clocksource: Switched to clocksource hpet

And nothing happens in the guest.

    cat /sys/devices/system/clocksource/clocksource0/current_clocksource
    kvm-clock


If I launch the guest after marking host TSC unstable, I see:

    Unstable clock detected, switching default tracing clock to "global"

and I don't get any "sched_clock: Marking stable" messages.

-- 
Thanks and Regards,
Prateek
Re: [PATCH v7] sched/clock: Avoid false sharing for sched_clock_irqtime
Posted by Shrikanth Hegde 1 week, 2 days ago

On 1/28/26 12:48 PM, K Prateek Nayak wrote:
> On 1/28/2026 11:56 AM, Shrikanth Hegde wrote:
>>
>>
>> On 1/28/26 8:35 AM, K Prateek Nayak wrote:
>>> On 1/28/2026 7:49 AM, Guo, Wangyang wrote:
>>>> Yes, when clock mark unstable through tsc_.*mark_unstable() with non-native_sched_clock, clear_sched_clock_stable won't be called, thus sched_clock_irqtime still keep enabled.
>>>>
>>>> Maybe the dedicated workqueue for sched_clock_irqtime is still needed considering this case.
>>>
>>> In that case, shouldn't tsc_init() only enable irqtime when
>>> using_native_sched_clock()? How can tsc_init() make a call on irqtime if
>>> TSC isn't being used as the sched_clock() ultimately?
>>>
>>> For kvmclock, if PVCLOCK_TSC_STABLE_BIT is not set, it'll call
>>> clear_sched_clock_stable() at kvm_sched_clock_init() but none of the
>>> other clocksources do so we can assume once we override the sched_clock()
>>> it is up to the sched_clock() provider to deal with the clock stability.
>>>
>>
>> I think this would depend if mark_tsc_unstable happens after system boot,
>> specially while running kvm guest?
> 
> I don't see anything on the guest side that would mark the kvmclock as
> unstable if host's TSC turns unstable post init and since kvmclock
> doesn't set CLOCK_SOURCE_MUST_VERIFY, I doubt if a watchdog runs to
> verify it in the guest.
> 
> I have the following in the guest:
> 
>      $ sudo dmesg | grep -i clock
>      [    0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
>      [    0.000000] kvm-clock: using sched offset of 423259259 cycles

This means pv_sched_clock is kvm_sched_clock_read from now. and
irqtime is enabled in the guest. right?

>      [    0.000002] clocksource: kvm-clock: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns
>      [    0.071675] clocksource: refined-jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645519600211568 ns
>      [    0.378467] clocksource: hpet: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 19112604467 ns
>      [    0.388678] clocksource: tsc-early: mask: 0xffffffffffffffff max_cycles: 0x398cb1e4d56, max_idle_ns: 881590790753 ns
>      [    0.679262] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns
>      [    0.903121] PTP clock support registered
>      [    0.927243] clocksource: Switched to clocksource kvm-clock
>      [    0.944986] clocksource: acpi_pm: mask: 0xffffff max_cycles: 0xffffff, max_idle_ns: 2085701024 ns
>      [    0.993198] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x398cb1e4d56, max_idle_ns: 881590790753 ns
>      [    1.123796] rtc_cmos 00:05: setting system clock to 2026-01-28T07:03:45 UTC (1769583825)
>      [    1.155755] sched_clock: Marking stable (940009972, 212965288)->(1171254846, -18279586)
>      [    1.712598] clk: Disabling unused clocks
> 
> Then I mark TSC unstable on the host
> 
>      tsc: Marking TSC unstable due to Faking unreliable TSC!
>      TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.
>      clocksource: Checking clocksource tsc synchronization from CPU 93 to CPUs 0,2,26,75,101,114,118,195.
>      sched_clock: Marking unstable (945948313746, 69389667)<-(947618130068, -1600430832)
>      clocksource:         CPU 93 check durations 3436ns - 25277ns for clocksource tsc.
>      clocksource: Switched to clocksource hpet
> 

so now, using_native_sched_clock should fail in guest? If so, with the patch,
irqtime won't be disabled no?

> And nothing happens in the guest.
> 
>      cat /sys/devices/system/clocksource/clocksource0/current_clocksource
>      kvm-clock
> 
> 
> If I launch the guest after marking host TSC unstable, I see:
> 
>      Unstable clock detected, switching default tracing clock to "global"
> 
> and I don't get any "sched_clock: Marking stable" messages.
> 

Maybe kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) won't be set.
Re: [PATCH v7] sched/clock: Avoid false sharing for sched_clock_irqtime
Posted by K Prateek Nayak 1 week, 2 days ago
On 1/28/2026 1:02 PM, Shrikanth Hegde wrote:
> 
> 
> On 1/28/26 12:48 PM, K Prateek Nayak wrote:
>> On 1/28/2026 11:56 AM, Shrikanth Hegde wrote:
>>>
>>>
>>> On 1/28/26 8:35 AM, K Prateek Nayak wrote:
>>>> On 1/28/2026 7:49 AM, Guo, Wangyang wrote:
>>>>> Yes, when clock mark unstable through tsc_.*mark_unstable() with non-native_sched_clock, clear_sched_clock_stable won't be called, thus sched_clock_irqtime still keep enabled.
>>>>>
>>>>> Maybe the dedicated workqueue for sched_clock_irqtime is still needed considering this case.
>>>>
>>>> In that case, shouldn't tsc_init() only enable irqtime when
>>>> using_native_sched_clock()? How can tsc_init() make a call on irqtime if
>>>> TSC isn't being used as the sched_clock() ultimately?
>>>>
>>>> For kvmclock, if PVCLOCK_TSC_STABLE_BIT is not set, it'll call
>>>> clear_sched_clock_stable() at kvm_sched_clock_init() but none of the
>>>> other clocksources do so we can assume once we override the sched_clock()
>>>> it is up to the sched_clock() provider to deal with the clock stability.
>>>>
>>>
>>> I think this would depend if mark_tsc_unstable happens after system boot,
>>> specially while running kvm guest?
>>
>> I don't see anything on the guest side that would mark the kvmclock as
>> unstable if host's TSC turns unstable post init and since kvmclock
>> doesn't set CLOCK_SOURCE_MUST_VERIFY, I doubt if a watchdog runs to
>> verify it in the guest.
>>
>> I have the following in the guest:
>>
>>      $ sudo dmesg | grep -i clock
>>      [    0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
>>      [    0.000000] kvm-clock: using sched offset of 423259259 cycles
> 
> This means pv_sched_clock is kvm_sched_clock_read from now. and
> irqtime is enabled in the guest. right?

So within the guest today ...

    $ sudo dmesg | grep -i "clock\|tsc"
    [    0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
    [    0.000000] kvm-clock: using sched offset of 504626078 cycles

    # kvm_sched_clock_init() happens here so it can potentially do
    # clear_sched_clock_stable() here if !PVCLOCK_TSC_STABLE_BIT.

    [    0.000002] clocksource: kvm-clock: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns
    [    0.000004] tsc: Detected 1996.251 MHz processor

    # We enable irqtime here once TSC frequency has been determined
    # without considering using_native_sched_clock()


After that TSC is never selected so we don't care if it is stable
or not since it is not the clocksource - the guest continues on
with unstable sched_clock() but also irqtime enabled since TSC
was calibrated successfully.

> 
>>      [    0.000002] clocksource: kvm-clock: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns
>>      [    0.071675] clocksource: refined-jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645519600211568 ns
>>      [    0.378467] clocksource: hpet: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 19112604467 ns
>>      [    0.388678] clocksource: tsc-early: mask: 0xffffffffffffffff max_cycles: 0x398cb1e4d56, max_idle_ns: 881590790753 ns
>>      [    0.679262] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns
>>      [    0.903121] PTP clock support registered
>>      [    0.927243] clocksource: Switched to clocksource kvm-clock
>>      [    0.944986] clocksource: acpi_pm: mask: 0xffffff max_cycles: 0xffffff, max_idle_ns: 2085701024 ns
>>      [    0.993198] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x398cb1e4d56, max_idle_ns: 881590790753 ns
>>      [    1.123796] rtc_cmos 00:05: setting system clock to 2026-01-28T07:03:45 UTC (1769583825)
>>      [    1.155755] sched_clock: Marking stable (940009972, 212965288)->(1171254846, -18279586)
>>      [    1.712598] clk: Disabling unused clocks
>>
>> Then I mark TSC unstable on the host
>>
>>      tsc: Marking TSC unstable due to Faking unreliable TSC!
>>      TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.
>>      clocksource: Checking clocksource tsc synchronization from CPU 93 to CPUs 0,2,26,75,101,114,118,195.
>>      sched_clock: Marking unstable (945948313746, 69389667)<-(947618130068, -1600430832)
>>      clocksource:         CPU 93 check durations 3436ns - 25277ns for clocksource tsc.
>>      clocksource: Switched to clocksource hpet
>>
> 
> so now, using_native_sched_clock should fail in guest? If so, with the patch,
> irqtime won't be disabled no?

Ideally yes, but the guest continues using kvmclock without any hitch.
I think the x86 KVM layer has something to ensure stability but I'm
not 100% sure.

Since I don't see "tsc: Marking TSC unstable ..." or "sched_clock:
Marking unstable ..." in the guest, we don't hit the mark_tsc_unstable()
path within the guest which would disable irqtime today so essentially
host's TSC turning changing doesn't seem to affect the guest.

> 
>> And nothing happens in the guest.
>>
>>      cat /sys/devices/system/clocksource/clocksource0/current_clocksource
>>      kvm-clock
>>
>>
>> If I launch the guest after marking host TSC unstable, I see:
>>
>>      Unstable clock detected, switching default tracing clock to "global"
>>
>> and I don't get any "sched_clock: Marking stable" messages.
>>
> 
> Maybe kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) won't be set.

True but that is only for new guests launches past marking TSC unstable
on the host.

-- 
Thanks and Regards,
Prateek

Re: [PATCH v7] sched/clock: Avoid false sharing for sched_clock_irqtime
Posted by Shrikanth Hegde 1 week, 2 days ago

On 1/28/26 1:20 PM, K Prateek Nayak wrote:
> On 1/28/2026 1:02 PM, Shrikanth Hegde wrote:
>>
>>
>> On 1/28/26 12:48 PM, K Prateek Nayak wrote:
>>> On 1/28/2026 11:56 AM, Shrikanth Hegde wrote:
>>>>
>>>>
>>>> On 1/28/26 8:35 AM, K Prateek Nayak wrote:
>>>>> On 1/28/2026 7:49 AM, Guo, Wangyang wrote:
>>>>>> Yes, when clock mark unstable through tsc_.*mark_unstable() with non-native_sched_clock, clear_sched_clock_stable won't be called, thus sched_clock_irqtime still keep enabled.
>>>>>>
>>>>>> Maybe the dedicated workqueue for sched_clock_irqtime is still needed considering this case.
>>>>>
>>>>> In that case, shouldn't tsc_init() only enable irqtime when
>>>>> using_native_sched_clock()? How can tsc_init() make a call on irqtime if
>>>>> TSC isn't being used as the sched_clock() ultimately?
>>>>>
>>>>> For kvmclock, if PVCLOCK_TSC_STABLE_BIT is not set, it'll call
>>>>> clear_sched_clock_stable() at kvm_sched_clock_init() but none of the
>>>>> other clocksources do so we can assume once we override the sched_clock()
>>>>> it is up to the sched_clock() provider to deal with the clock stability.
>>>>>
>>>>
>>>> I think this would depend if mark_tsc_unstable happens after system boot,
>>>> specially while running kvm guest?
>>>
>>> I don't see anything on the guest side that would mark the kvmclock as
>>> unstable if host's TSC turns unstable post init and since kvmclock
>>> doesn't set CLOCK_SOURCE_MUST_VERIFY, I doubt if a watchdog runs to
>>> verify it in the guest.
>>>
>>> I have the following in the guest:
>>>
>>>       $ sudo dmesg | grep -i clock
>>>       [    0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
>>>       [    0.000000] kvm-clock: using sched offset of 423259259 cycles
>>
>> This means pv_sched_clock is kvm_sched_clock_read from now. and
>> irqtime is enabled in the guest. right?
> 
> So within the guest today ...
> 
>      $ sudo dmesg | grep -i "clock\|tsc"
>      [    0.000000] kvm-clock: Using msrs 4b564d01 and 4b564d00
>      [    0.000000] kvm-clock: using sched offset of 504626078 cycles
> 
>      # kvm_sched_clock_init() happens here so it can potentially do
>      # clear_sched_clock_stable() here if !PVCLOCK_TSC_STABLE_BIT.
> 
>      [    0.000002] clocksource: kvm-clock: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns
>      [    0.000004] tsc: Detected 1996.251 MHz processor
> 
>      # We enable irqtime here once TSC frequency has been determined
>      # without considering using_native_sched_clock()
> 
> 
> After that TSC is never selected so we don't care if it is stable
> or not since it is not the clocksource - the guest continues on
> with unstable sched_clock() but also irqtime enabled since TSC
> was calibrated successfully.
> 
>>
>>>       [    0.000002] clocksource: kvm-clock: mask: 0xffffffffffffffff max_cycles: 0x1cd42e4dffb, max_idle_ns: 881590591483 ns
>>>       [    0.071675] clocksource: refined-jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645519600211568 ns
>>>       [    0.378467] clocksource: hpet: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 19112604467 ns
>>>       [    0.388678] clocksource: tsc-early: mask: 0xffffffffffffffff max_cycles: 0x398cb1e4d56, max_idle_ns: 881590790753 ns
>>>       [    0.679262] clocksource: jiffies: mask: 0xffffffff max_cycles: 0xffffffff, max_idle_ns: 7645041785100000 ns
>>>       [    0.903121] PTP clock support registered
>>>       [    0.927243] clocksource: Switched to clocksource kvm-clock
>>>       [    0.944986] clocksource: acpi_pm: mask: 0xffffff max_cycles: 0xffffff, max_idle_ns: 2085701024 ns
>>>       [    0.993198] clocksource: tsc: mask: 0xffffffffffffffff max_cycles: 0x398cb1e4d56, max_idle_ns: 881590790753 ns
>>>       [    1.123796] rtc_cmos 00:05: setting system clock to 2026-01-28T07:03:45 UTC (1769583825)
>>>       [    1.155755] sched_clock: Marking stable (940009972, 212965288)->(1171254846, -18279586)
>>>       [    1.712598] clk: Disabling unused clocks
>>>
>>> Then I mark TSC unstable on the host
>>>
>>>       tsc: Marking TSC unstable due to Faking unreliable TSC!
>>>       TSC found unstable after boot, most likely due to broken BIOS. Use 'tsc=unstable'.
>>>       clocksource: Checking clocksource tsc synchronization from CPU 93 to CPUs 0,2,26,75,101,114,118,195.
>>>       sched_clock: Marking unstable (945948313746, 69389667)<-(947618130068, -1600430832)
>>>       clocksource:         CPU 93 check durations 3436ns - 25277ns for clocksource tsc.
>>>       clocksource: Switched to clocksource hpet
>>>
>>
>> so now, using_native_sched_clock should fail in guest? If so, with the patch,
>> irqtime won't be disabled no?
> 
> Ideally yes, but the guest continues using kvmclock without any hitch.
> I think the x86 KVM layer has something to ensure stability but I'm
> not 100% sure.
> 
> Since I don't see "tsc: Marking TSC unstable ..." or "sched_clock:
> Marking unstable ..." in the guest, we don't hit the mark_tsc_unstable()
> path within the guest which would disable irqtime today so essentially
> host's TSC turning changing doesn't seem to affect the guest.
> 
>>

Okay. Fair enough.
Then v7 should cover all scenarios i think. with that,

Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>

[tip: sched/core] sched/clock: Avoid false sharing for sched_clock_irqtime
Posted by tip-bot2 for Wangyang Guo 3 days, 18 hours ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     505da6689305b1103e9a8ab6636c6a7cf74cd5b1
Gitweb:        https://git.kernel.org/tip/505da6689305b1103e9a8ab6636c6a7cf74cd5b1
Author:        Wangyang Guo <wangyang.guo@intel.com>
AuthorDate:    Tue, 27 Jan 2026 15:25:09 +08:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Tue, 03 Feb 2026 12:04:19 +01:00

sched/clock: Avoid false sharing for sched_clock_irqtime

Read-mostly sched_clock_irqtime may share the same cacheline with
frequently updated nohz struct. Make it as static_key to avoid
false sharing issue.

The only user of disable_sched_clock_irqtime()
is tsc_.*mark_unstable() which may be invoked under atomic context
and require a workqueue to disable static_key. But both of them
calls clear_sched_clock_stable() just before doing
disable_sched_clock_irqtime(). We can reuse
"sched_clock_work" to also disable sched_clock_irqtime().

One additional case need to handle is if the tsc is marked unstable
before late_initcall() phase, sched_clock_work will not be invoked
and sched_clock_irqtime will stay enabled although clock is unstable:
  tsc_init()
    enable_sched_clock_irqtime() # irqtime accounting is enabled here
    ...
    if (unsynchronized_tsc()) # true
      mark_tsc_unstable()
        clear_sched_clock_stable()
          __sched_clock_stable_early = 0;
          ...
          if (static_key_count(&sched_clock_running.key) == 2)
            # Only happens at sched_clock_init_late()
            __clear_sched_clock_stable(); # Never executed
  ...

  # late_initcall() phase
  sched_clock_init_late()
    if (__sched_clock_stable_early) # Already false
      __set_sched_clock_stable(); # sched_clock is never marked stable
  # TSC unstable, but sched_clock_work won't run to disable irqtime

So we need to disable_sched_clock_irqtime() in sched_clock_init_late()
if clock is unstable.

Reported-by: Benjamin Lei <benjamin.lei@intel.com>
Suggested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Suggested-by: Peter Zijlstra <peterz@infradead.org>
Suggested-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Signed-off-by: Wangyang Guo <wangyang.guo@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: K Prateek Nayak <kprateek.nayak@amd.com>
Reviewed-by: Tim Chen <tim.c.chen@linux.intel.com>
Reviewed-by: Tianyou Li <tianyou.li@intel.com>
Reviewed-by: Shrikanth Hegde <sshegde@linux.ibm.com>
Tested-by: K Prateek Nayak <kprateek.nayak@amd.com>
Link: https://patch.msgid.link/20260127072509.2627346-1-wangyang.guo@intel.com
---
 arch/x86/kernel/tsc.c  |  2 --
 kernel/sched/clock.c   |  3 +++
 kernel/sched/cputime.c |  9 +++++----
 kernel/sched/sched.h   |  4 ++--
 4 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 7d3e13e..7be44b5 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1143,7 +1143,6 @@ static void tsc_cs_mark_unstable(struct clocksource *cs)
 	tsc_unstable = 1;
 	if (using_native_sched_clock())
 		clear_sched_clock_stable();
-	disable_sched_clock_irqtime();
 	pr_info("Marking TSC unstable due to clocksource watchdog\n");
 }
 
@@ -1213,7 +1212,6 @@ void mark_tsc_unstable(char *reason)
 	tsc_unstable = 1;
 	if (using_native_sched_clock())
 		clear_sched_clock_stable();
-	disable_sched_clock_irqtime();
 	pr_info("Marking TSC unstable due to %s\n", reason);
 
 	clocksource_mark_unstable(&clocksource_tsc_early);
diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c
index f5e6dd6..2ae4fbf 100644
--- a/kernel/sched/clock.c
+++ b/kernel/sched/clock.c
@@ -173,6 +173,7 @@ notrace static void __sched_clock_work(struct work_struct *work)
 			scd->tick_gtod, __gtod_offset,
 			scd->tick_raw,  __sched_clock_offset);
 
+	disable_sched_clock_irqtime();
 	static_branch_disable(&__sched_clock_stable);
 }
 
@@ -238,6 +239,8 @@ static int __init sched_clock_init_late(void)
 
 	if (__sched_clock_stable_early)
 		__set_sched_clock_stable();
+	else
+		disable_sched_clock_irqtime();  /* disable if clock unstable. */
 
 	return 0;
 }
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 4f97896..ff0dfca 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -12,6 +12,8 @@
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
 
+DEFINE_STATIC_KEY_FALSE(sched_clock_irqtime);
+
 /*
  * There are no locks covering percpu hardirq/softirq time.
  * They are only modified in vtime_account, on corresponding CPU
@@ -25,16 +27,15 @@
  */
 DEFINE_PER_CPU(struct irqtime, cpu_irqtime);
 
-int sched_clock_irqtime;
-
 void enable_sched_clock_irqtime(void)
 {
-	sched_clock_irqtime = 1;
+	static_branch_enable(&sched_clock_irqtime);
 }
 
 void disable_sched_clock_irqtime(void)
 {
-	sched_clock_irqtime = 0;
+	if (irqtime_enabled())
+		static_branch_disable(&sched_clock_irqtime);
 }
 
 static void irqtime_account_delta(struct irqtime *irqtime, u64 delta,
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 2aa4251..a821cc8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3333,11 +3333,11 @@ struct irqtime {
 };
 
 DECLARE_PER_CPU(struct irqtime, cpu_irqtime);
-extern int sched_clock_irqtime;
+DECLARE_STATIC_KEY_FALSE(sched_clock_irqtime);
 
 static inline int irqtime_enabled(void)
 {
-	return sched_clock_irqtime;
+	return static_branch_likely(&sched_clock_irqtime);
 }
 
 /*