kernel/sched/idle.c | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-)
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
There are two special cases in the idle loop that are handled
inconsistently even though they are analogous.
The first one is when a cpuidle driver is absent and the default CPU
idle time power management implemented by the architecture code is used.
In that case, the scheduler tick is stopped every time before invoking
default_idle_call().
The second one is when a cpuidle driver is present, but there is only
one idle state in its table. In that case, the scheduler tick is never
stopped.
Since each of these approaches leads to suboptimal choices in some
cases, reconcile them with the help of one simple heuristic. Namely,
stop the tick if the CPU has been woken up by it in the previous
iteration of the idle loop, or let it tick otherwise.
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
Based on today's mainline.
---
kernel/sched/idle.c | 30 +++++++++++++++++++++---------
1 file changed, 21 insertions(+), 9 deletions(-)
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -161,6 +161,14 @@ static int call_cpuidle(struct cpuidle_d
return cpuidle_enter(drv, dev, next_state);
}
+static void idle_call_stop_or_retain_tick(bool stop_tick)
+{
+ if (stop_tick || tick_nohz_tick_stopped())
+ tick_nohz_idle_stop_tick();
+ else
+ tick_nohz_idle_retain_tick();
+}
+
/**
* cpuidle_idle_call - the main idle function
*
@@ -170,7 +178,7 @@ static int call_cpuidle(struct cpuidle_d
* set, and it returns with polling set. If it ever stops polling, it
* must clear the polling bit.
*/
-static void cpuidle_idle_call(void)
+static void cpuidle_idle_call(bool stop_tick)
{
struct cpuidle_device *dev = cpuidle_get_device();
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
@@ -186,7 +194,7 @@ static void cpuidle_idle_call(void)
}
if (cpuidle_not_available(drv, dev)) {
- tick_nohz_idle_stop_tick();
+ idle_call_stop_or_retain_tick(stop_tick);
default_idle_call();
goto exit_idle;
@@ -222,17 +230,19 @@ static void cpuidle_idle_call(void)
next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns);
call_cpuidle(drv, dev, next_state);
} else if (drv->state_count > 1) {
- bool stop_tick = true;
+ /*
+ * stop_tick is expected to be true by default by cpuidle
+ * governors, which allows them to select idle states with
+ * target residency above the tick period length.
+ */
+ stop_tick = true;
/*
* Ask the cpuidle framework to choose a convenient idle state.
*/
next_state = cpuidle_select(drv, dev, &stop_tick);
- if (stop_tick || tick_nohz_tick_stopped())
- tick_nohz_idle_stop_tick();
- else
- tick_nohz_idle_retain_tick();
+ idle_call_stop_or_retain_tick(stop_tick);
entered_state = call_cpuidle(drv, dev, next_state);
/*
@@ -240,7 +250,7 @@ static void cpuidle_idle_call(void)
*/
cpuidle_reflect(dev, entered_state);
} else {
- tick_nohz_idle_retain_tick();
+ idle_call_stop_or_retain_tick(stop_tick);
/*
* If there is only a single idle state (or none), there is
@@ -268,6 +278,7 @@ exit_idle:
static void do_idle(void)
{
int cpu = smp_processor_id();
+ bool got_tick = false;
/*
* Check if we need to update blocked load
@@ -338,8 +349,9 @@ static void do_idle(void)
tick_nohz_idle_restart_tick();
cpu_idle_poll();
} else {
- cpuidle_idle_call();
+ cpuidle_idle_call(got_tick);
}
+ got_tick = tick_nohz_idle_got_tick();
arch_cpu_idle_exit();
}
On Fri, 2026-03-13 at 13:25 +0100, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>
> There are two special cases in the idle loop that are handled
> inconsistently even though they are analogous.
>
> The first one is when a cpuidle driver is absent and the default CPU
> idle time power management implemented by the architecture code is used.
> In that case, the scheduler tick is stopped every time before invoking
> default_idle_call().
>
> The second one is when a cpuidle driver is present, but there is only
> one idle state in its table. In that case, the scheduler tick is never
> stopped.
>
> Since each of these approaches leads to suboptimal choices in some
> cases, reconcile them with the help of one simple heuristic. Namely,
> stop the tick if the CPU has been woken up by it in the previous
> iteration of the idle loop, or let it tick otherwise.
>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
>
> Based on today's mainline.
>
> ---
> kernel/sched/idle.c | 30 +++++++++++++++++++++---------
> 1 file changed, 21 insertions(+), 9 deletions(-)
>
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -161,6 +161,14 @@ static int call_cpuidle(struct cpuidle_d
> return cpuidle_enter(drv, dev, next_state);
> }
>
> +static void idle_call_stop_or_retain_tick(bool stop_tick)
> +{
> + if (stop_tick || tick_nohz_tick_stopped())
> + tick_nohz_idle_stop_tick();
> + else
> + tick_nohz_idle_retain_tick();
> +}
> +
> /**
> * cpuidle_idle_call - the main idle function
> *
> @@ -170,7 +178,7 @@ static int call_cpuidle(struct cpuidle_d
> * set, and it returns with polling set. If it ever stops polling, it
> * must clear the polling bit.
> */
> -static void cpuidle_idle_call(void)
> +static void cpuidle_idle_call(bool stop_tick)
> {
> struct cpuidle_device *dev = cpuidle_get_device();
> struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
> @@ -186,7 +194,7 @@ static void cpuidle_idle_call(void)
> }
>
> if (cpuidle_not_available(drv, dev)) {
> - tick_nohz_idle_stop_tick();
> + idle_call_stop_or_retain_tick(stop_tick);
>
> default_idle_call();
> goto exit_idle;
> @@ -222,17 +230,19 @@ static void cpuidle_idle_call(void)
> next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns);
> call_cpuidle(drv, dev, next_state);
> } else if (drv->state_count > 1) {
> - bool stop_tick = true;
> + /*
> + * stop_tick is expected to be true by default by cpuidle
> + * governors, which allows them to select idle states with
> + * target residency above the tick period length.
> + */
> + stop_tick = true;
>
> /*
> * Ask the cpuidle framework to choose a convenient idle state.
> */
> next_state = cpuidle_select(drv, dev, &stop_tick);
>
> - if (stop_tick || tick_nohz_tick_stopped())
> - tick_nohz_idle_stop_tick();
> - else
> - tick_nohz_idle_retain_tick();
> + idle_call_stop_or_retain_tick(stop_tick);
>
> entered_state = call_cpuidle(drv, dev, next_state);
> /*
> @@ -240,7 +250,7 @@ static void cpuidle_idle_call(void)
> */
> cpuidle_reflect(dev, entered_state);
> } else {
> - tick_nohz_idle_retain_tick();
> + idle_call_stop_or_retain_tick(stop_tick);
>
> /*
> * If there is only a single idle state (or none), there is
> @@ -268,6 +278,7 @@ exit_idle:
> static void do_idle(void)
> {
> int cpu = smp_processor_id();
> + bool got_tick = false;
>
> /*
> * Check if we need to update blocked load
> @@ -338,8 +349,9 @@ static void do_idle(void)
> tick_nohz_idle_restart_tick();
> cpu_idle_poll();
> } else {
> - cpuidle_idle_call();
> + cpuidle_idle_call(got_tick);
> }
> + got_tick = tick_nohz_idle_got_tick();
> arch_cpu_idle_exit();
> }
>
>
This unifies the special-case tick handling with a heuristic,
affecting only the no-driver and single-state paths while leaving the
common multi-state governor path behaviorally unchanged.
So,
Reviewed-by: Aboorva Devarajan <aboorvad@linux.ibm.com>
Thanks,
Aboorva
On 03/13/26 13:25, Rafael J. Wysocki wrote: > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > There are two special cases in the idle loop that are handled > inconsistently even though they are analogous. > > The first one is when a cpuidle driver is absent and the default CPU > idle time power management implemented by the architecture code is used. > In that case, the scheduler tick is stopped every time before invoking > default_idle_call(). > > The second one is when a cpuidle driver is present, but there is only > one idle state in its table. In that case, the scheduler tick is never > stopped. > > Since each of these approaches leads to suboptimal choices in some > cases, reconcile them with the help of one simple heuristic. Namely, > stop the tick if the CPU has been woken up by it in the previous > iteration of the idle loop, or let it tick otherwise. > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > --- Thanks for the comment! Reviewed-by: Qais Yousef <qyousef@layalina.io>
Le Fri, Mar 13, 2026 at 01:25:41PM +0100, Rafael J. Wysocki a écrit : > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com> > > There are two special cases in the idle loop that are handled > inconsistently even though they are analogous. > > The first one is when a cpuidle driver is absent and the default CPU > idle time power management implemented by the architecture code is used. > In that case, the scheduler tick is stopped every time before invoking > default_idle_call(). > > The second one is when a cpuidle driver is present, but there is only > one idle state in its table. In that case, the scheduler tick is never > stopped. > > Since each of these approaches leads to suboptimal choices in some > cases, reconcile them with the help of one simple heuristic. Namely, > stop the tick if the CPU has been woken up by it in the previous > iteration of the idle loop, or let it tick otherwise. > > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Reviewed-by: Frederic Weisbecker <frederic@kernel.org> -- Frederic Weisbecker SUSE Labs
On 3/13/26 12:25, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>
> There are two special cases in the idle loop that are handled
> inconsistently even though they are analogous.
>
> The first one is when a cpuidle driver is absent and the default CPU
> idle time power management implemented by the architecture code is used.
> In that case, the scheduler tick is stopped every time before invoking
> default_idle_call().
>
> The second one is when a cpuidle driver is present, but there is only
> one idle state in its table. In that case, the scheduler tick is never
> stopped.
>
> Since each of these approaches leads to suboptimal choices in some
> cases, reconcile them with the help of one simple heuristic. Namely,
> stop the tick if the CPU has been woken up by it in the previous
> iteration of the idle loop, or let it tick otherwise.>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
>
> Based on today's mainline.
>
> ---
> kernel/sched/idle.c | 30 +++++++++++++++++++++---------
> 1 file changed, 21 insertions(+), 9 deletions(-)
>
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -161,6 +161,14 @@ static int call_cpuidle(struct cpuidle_d
> return cpuidle_enter(drv, dev, next_state);
> }
>
> +static void idle_call_stop_or_retain_tick(bool stop_tick)
> +{
> + if (stop_tick || tick_nohz_tick_stopped())
> + tick_nohz_idle_stop_tick();
> + else
> + tick_nohz_idle_retain_tick();
> +}
> +
> /**
> * cpuidle_idle_call - the main idle function
> *
> @@ -170,7 +178,7 @@ static int call_cpuidle(struct cpuidle_d
> * set, and it returns with polling set. If it ever stops polling, it
> * must clear the polling bit.
> */
> -static void cpuidle_idle_call(void)
> +static void cpuidle_idle_call(bool stop_tick)
> {
> struct cpuidle_device *dev = cpuidle_get_device();
> struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
> @@ -186,7 +194,7 @@ static void cpuidle_idle_call(void)
> }
>
> if (cpuidle_not_available(drv, dev)) {
> - tick_nohz_idle_stop_tick();
> + idle_call_stop_or_retain_tick(stop_tick);
>
> default_idle_call();
> goto exit_idle;
> @@ -222,17 +230,19 @@ static void cpuidle_idle_call(void)
> next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns);
> call_cpuidle(drv, dev, next_state);
> } else if (drv->state_count > 1) {
> - bool stop_tick = true;
> + /*
> + * stop_tick is expected to be true by default by cpuidle
> + * governors, which allows them to select idle states with
> + * target residency above the tick period length.
> + */
> + stop_tick = true;
>
> /*
> * Ask the cpuidle framework to choose a convenient idle state.
> */
> next_state = cpuidle_select(drv, dev, &stop_tick);
>
> - if (stop_tick || tick_nohz_tick_stopped())
> - tick_nohz_idle_stop_tick();
> - else
> - tick_nohz_idle_retain_tick();
> + idle_call_stop_or_retain_tick(stop_tick);
>
> entered_state = call_cpuidle(drv, dev, next_state);
> /*
> @@ -240,7 +250,7 @@ static void cpuidle_idle_call(void)
> */
> cpuidle_reflect(dev, entered_state);
> } else {
> - tick_nohz_idle_retain_tick();
> + idle_call_stop_or_retain_tick(stop_tick);
This would supersede e5c9ffc6ae1b ("cpuidle: Skip governor when only one idle state is available")
so we should remove that code too.
LGTM otherwise.
>
> /*
> * If there is only a single idle state (or none), there is
> @@ -268,6 +278,7 @@ exit_idle:
> static void do_idle(void)
> {
> int cpu = smp_processor_id();
> + bool got_tick = false;
>
> /*
> * Check if we need to update blocked load
> @@ -338,8 +349,9 @@ static void do_idle(void)
> tick_nohz_idle_restart_tick();
> cpu_idle_poll();
> } else {
> - cpuidle_idle_call();
> + cpuidle_idle_call(got_tick);
> }
> + got_tick = tick_nohz_idle_got_tick();
> arch_cpu_idle_exit();
> }
>
>
>
>
On Fri, Mar 13, 2026 at 1:53 PM Christian Loehle
<christian.loehle@arm.com> wrote:
>
> On 3/13/26 12:25, Rafael J. Wysocki wrote:
> > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >
> > There are two special cases in the idle loop that are handled
> > inconsistently even though they are analogous.
> >
> > The first one is when a cpuidle driver is absent and the default CPU
> > idle time power management implemented by the architecture code is used.
> > In that case, the scheduler tick is stopped every time before invoking
> > default_idle_call().
> >
> > The second one is when a cpuidle driver is present, but there is only
> > one idle state in its table. In that case, the scheduler tick is never
> > stopped.
> >
> > Since each of these approaches leads to suboptimal choices in some
> > cases, reconcile them with the help of one simple heuristic. Namely,
> > stop the tick if the CPU has been woken up by it in the previous
> > iteration of the idle loop, or let it tick otherwise.>
> > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > ---
> >
> > Based on today's mainline.
> >
> > ---
> > kernel/sched/idle.c | 30 +++++++++++++++++++++---------
> > 1 file changed, 21 insertions(+), 9 deletions(-)
> >
> > --- a/kernel/sched/idle.c
> > +++ b/kernel/sched/idle.c
> > @@ -161,6 +161,14 @@ static int call_cpuidle(struct cpuidle_d
> > return cpuidle_enter(drv, dev, next_state);
> > }
> >
> > +static void idle_call_stop_or_retain_tick(bool stop_tick)
> > +{
> > + if (stop_tick || tick_nohz_tick_stopped())
> > + tick_nohz_idle_stop_tick();
> > + else
> > + tick_nohz_idle_retain_tick();
> > +}
> > +
> > /**
> > * cpuidle_idle_call - the main idle function
> > *
> > @@ -170,7 +178,7 @@ static int call_cpuidle(struct cpuidle_d
> > * set, and it returns with polling set. If it ever stops polling, it
> > * must clear the polling bit.
> > */
> > -static void cpuidle_idle_call(void)
> > +static void cpuidle_idle_call(bool stop_tick)
> > {
> > struct cpuidle_device *dev = cpuidle_get_device();
> > struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
> > @@ -186,7 +194,7 @@ static void cpuidle_idle_call(void)
> > }
> >
> > if (cpuidle_not_available(drv, dev)) {
> > - tick_nohz_idle_stop_tick();
> > + idle_call_stop_or_retain_tick(stop_tick);
> >
> > default_idle_call();
> > goto exit_idle;
> > @@ -222,17 +230,19 @@ static void cpuidle_idle_call(void)
> > next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns);
> > call_cpuidle(drv, dev, next_state);
> > } else if (drv->state_count > 1) {
> > - bool stop_tick = true;
> > + /*
> > + * stop_tick is expected to be true by default by cpuidle
> > + * governors, which allows them to select idle states with
> > + * target residency above the tick period length.
> > + */
> > + stop_tick = true;
> >
> > /*
> > * Ask the cpuidle framework to choose a convenient idle state.
> > */
> > next_state = cpuidle_select(drv, dev, &stop_tick);
> >
> > - if (stop_tick || tick_nohz_tick_stopped())
> > - tick_nohz_idle_stop_tick();
> > - else
> > - tick_nohz_idle_retain_tick();
> > + idle_call_stop_or_retain_tick(stop_tick);
> >
> > entered_state = call_cpuidle(drv, dev, next_state);
> > /*
> > @@ -240,7 +250,7 @@ static void cpuidle_idle_call(void)
> > */
> > cpuidle_reflect(dev, entered_state);
> > } else {
> > - tick_nohz_idle_retain_tick();
> > + idle_call_stop_or_retain_tick(stop_tick);
>
> This would supersede e5c9ffc6ae1b ("cpuidle: Skip governor when only one idle state is available")
> so we should remove that code too.
Which code? Do you mean the one that has been removed by
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d557640e4ce589a24dca5ca7ce3b9680f471325f
> LGTM otherwise.
Cool.
> >
> > /*
> > * If there is only a single idle state (or none), there is
> > @@ -268,6 +278,7 @@ exit_idle:
> > static void do_idle(void)
> > {
> > int cpu = smp_processor_id();
> > + bool got_tick = false;
> >
> > /*
> > * Check if we need to update blocked load
> > @@ -338,8 +349,9 @@ static void do_idle(void)
> > tick_nohz_idle_restart_tick();
> > cpu_idle_poll();
> > } else {
> > - cpuidle_idle_call();
> > + cpuidle_idle_call(got_tick);
> > }
> > + got_tick = tick_nohz_idle_got_tick();
> > arch_cpu_idle_exit();
> > }
> >
> >
> >
> >
>
On 3/13/26 13:07, Rafael J. Wysocki wrote:
> On Fri, Mar 13, 2026 at 1:53 PM Christian Loehle
> <christian.loehle@arm.com> wrote:
>>
>> On 3/13/26 12:25, Rafael J. Wysocki wrote:
>>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>>
>>> There are two special cases in the idle loop that are handled
>>> inconsistently even though they are analogous.
>>>
>>> The first one is when a cpuidle driver is absent and the default CPU
>>> idle time power management implemented by the architecture code is used.
>>> In that case, the scheduler tick is stopped every time before invoking
>>> default_idle_call().
>>>
>>> The second one is when a cpuidle driver is present, but there is only
>>> one idle state in its table. In that case, the scheduler tick is never
>>> stopped.
>>>
>>> Since each of these approaches leads to suboptimal choices in some
>>> cases, reconcile them with the help of one simple heuristic. Namely,
>>> stop the tick if the CPU has been woken up by it in the previous
>>> iteration of the idle loop, or let it tick otherwise.>
>>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>> ---
>>>
>>> Based on today's mainline.
>>>
>>> ---
>>> kernel/sched/idle.c | 30 +++++++++++++++++++++---------
>>> 1 file changed, 21 insertions(+), 9 deletions(-)
>>>
>>> --- a/kernel/sched/idle.c
>>> +++ b/kernel/sched/idle.c
>>> @@ -161,6 +161,14 @@ static int call_cpuidle(struct cpuidle_d
>>> return cpuidle_enter(drv, dev, next_state);
>>> }
>>>
>>> +static void idle_call_stop_or_retain_tick(bool stop_tick)
>>> +{
>>> + if (stop_tick || tick_nohz_tick_stopped())
>>> + tick_nohz_idle_stop_tick();
>>> + else
>>> + tick_nohz_idle_retain_tick();
>>> +}
>>> +
>>> /**
>>> * cpuidle_idle_call - the main idle function
>>> *
>>> @@ -170,7 +178,7 @@ static int call_cpuidle(struct cpuidle_d
>>> * set, and it returns with polling set. If it ever stops polling, it
>>> * must clear the polling bit.
>>> */
>>> -static void cpuidle_idle_call(void)
>>> +static void cpuidle_idle_call(bool stop_tick)
>>> {
>>> struct cpuidle_device *dev = cpuidle_get_device();
>>> struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
>>> @@ -186,7 +194,7 @@ static void cpuidle_idle_call(void)
>>> }
>>>
>>> if (cpuidle_not_available(drv, dev)) {
>>> - tick_nohz_idle_stop_tick();
>>> + idle_call_stop_or_retain_tick(stop_tick);
>>>
>>> default_idle_call();
>>> goto exit_idle;
>>> @@ -222,17 +230,19 @@ static void cpuidle_idle_call(void)
>>> next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns);
>>> call_cpuidle(drv, dev, next_state);
>>> } else if (drv->state_count > 1) {
>>> - bool stop_tick = true;
>>> + /*
>>> + * stop_tick is expected to be true by default by cpuidle
>>> + * governors, which allows them to select idle states with
>>> + * target residency above the tick period length.
>>> + */
>>> + stop_tick = true;
>>>
>>> /*
>>> * Ask the cpuidle framework to choose a convenient idle state.
>>> */
>>> next_state = cpuidle_select(drv, dev, &stop_tick);
>>>
>>> - if (stop_tick || tick_nohz_tick_stopped())
>>> - tick_nohz_idle_stop_tick();
>>> - else
>>> - tick_nohz_idle_retain_tick();
>>> + idle_call_stop_or_retain_tick(stop_tick);
>>>
>>> entered_state = call_cpuidle(drv, dev, next_state);
>>> /*
>>> @@ -240,7 +250,7 @@ static void cpuidle_idle_call(void)
>>> */
>>> cpuidle_reflect(dev, entered_state);
>>> } else {
>>> - tick_nohz_idle_retain_tick();
>>> + idle_call_stop_or_retain_tick(stop_tick);
>>
>> This would supersede e5c9ffc6ae1b ("cpuidle: Skip governor when only one idle state is available")
>> so we should remove that code too.
>
> Which code? Do you mean the one that has been removed by
>
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d557640e4ce589a24dca5ca7ce3b9680f471325f
>
Ah of course, sorry!
Reviewed-by: Christian Loehle <christian.loehle@arm.com>
On Fri, Mar 13, 2026 at 3:04 PM Christian Loehle
<christian.loehle@arm.com> wrote:
>
> On 3/13/26 13:07, Rafael J. Wysocki wrote:
> > On Fri, Mar 13, 2026 at 1:53 PM Christian Loehle
> > <christian.loehle@arm.com> wrote:
> >>
> >> On 3/13/26 12:25, Rafael J. Wysocki wrote:
> >>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >>>
> >>> There are two special cases in the idle loop that are handled
> >>> inconsistently even though they are analogous.
> >>>
> >>> The first one is when a cpuidle driver is absent and the default CPU
> >>> idle time power management implemented by the architecture code is used.
> >>> In that case, the scheduler tick is stopped every time before invoking
> >>> default_idle_call().
> >>>
> >>> The second one is when a cpuidle driver is present, but there is only
> >>> one idle state in its table. In that case, the scheduler tick is never
> >>> stopped.
> >>>
> >>> Since each of these approaches leads to suboptimal choices in some
> >>> cases, reconcile them with the help of one simple heuristic. Namely,
> >>> stop the tick if the CPU has been woken up by it in the previous
> >>> iteration of the idle loop, or let it tick otherwise.>
> >>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >>> ---
> >>>
> >>> Based on today's mainline.
> >>>
> >>> ---
> >>> kernel/sched/idle.c | 30 +++++++++++++++++++++---------
> >>> 1 file changed, 21 insertions(+), 9 deletions(-)
> >>>
> >>> --- a/kernel/sched/idle.c
> >>> +++ b/kernel/sched/idle.c
> >>> @@ -161,6 +161,14 @@ static int call_cpuidle(struct cpuidle_d
> >>> return cpuidle_enter(drv, dev, next_state);
> >>> }
> >>>
> >>> +static void idle_call_stop_or_retain_tick(bool stop_tick)
> >>> +{
> >>> + if (stop_tick || tick_nohz_tick_stopped())
> >>> + tick_nohz_idle_stop_tick();
> >>> + else
> >>> + tick_nohz_idle_retain_tick();
> >>> +}
> >>> +
> >>> /**
> >>> * cpuidle_idle_call - the main idle function
> >>> *
> >>> @@ -170,7 +178,7 @@ static int call_cpuidle(struct cpuidle_d
> >>> * set, and it returns with polling set. If it ever stops polling, it
> >>> * must clear the polling bit.
> >>> */
> >>> -static void cpuidle_idle_call(void)
> >>> +static void cpuidle_idle_call(bool stop_tick)
> >>> {
> >>> struct cpuidle_device *dev = cpuidle_get_device();
> >>> struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
> >>> @@ -186,7 +194,7 @@ static void cpuidle_idle_call(void)
> >>> }
> >>>
> >>> if (cpuidle_not_available(drv, dev)) {
> >>> - tick_nohz_idle_stop_tick();
> >>> + idle_call_stop_or_retain_tick(stop_tick);
> >>>
> >>> default_idle_call();
> >>> goto exit_idle;
> >>> @@ -222,17 +230,19 @@ static void cpuidle_idle_call(void)
> >>> next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns);
> >>> call_cpuidle(drv, dev, next_state);
> >>> } else if (drv->state_count > 1) {
> >>> - bool stop_tick = true;
> >>> + /*
> >>> + * stop_tick is expected to be true by default by cpuidle
> >>> + * governors, which allows them to select idle states with
> >>> + * target residency above the tick period length.
> >>> + */
> >>> + stop_tick = true;
> >>>
> >>> /*
> >>> * Ask the cpuidle framework to choose a convenient idle state.
> >>> */
> >>> next_state = cpuidle_select(drv, dev, &stop_tick);
> >>>
> >>> - if (stop_tick || tick_nohz_tick_stopped())
> >>> - tick_nohz_idle_stop_tick();
> >>> - else
> >>> - tick_nohz_idle_retain_tick();
> >>> + idle_call_stop_or_retain_tick(stop_tick);
> >>>
> >>> entered_state = call_cpuidle(drv, dev, next_state);
> >>> /*
> >>> @@ -240,7 +250,7 @@ static void cpuidle_idle_call(void)
> >>> */
> >>> cpuidle_reflect(dev, entered_state);
> >>> } else {
> >>> - tick_nohz_idle_retain_tick();
> >>> + idle_call_stop_or_retain_tick(stop_tick);
> >>
> >> This would supersede e5c9ffc6ae1b ("cpuidle: Skip governor when only one idle state is available")
> >> so we should remove that code too.
> >
> > Which code? Do you mean the one that has been removed by
> >
> > https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d557640e4ce589a24dca5ca7ce3b9680f471325f
> >
> Ah of course, sorry!
No worries.
> Reviewed-by: Christian Loehle <christian.loehle@arm.com>
So should I regard this as a fix for 7.0?
I guess so because I don't think it would be useful to ship 7.0
without it only to change the behavior immediately in 7.1. And I
think that it can be treated as a fix for e5c9ffc6ae1b (above).
On 3/13/26 15:28, Rafael J. Wysocki wrote:
> On Fri, Mar 13, 2026 at 3:04 PM Christian Loehle
> <christian.loehle@arm.com> wrote:
>>
>> On 3/13/26 13:07, Rafael J. Wysocki wrote:
>>> On Fri, Mar 13, 2026 at 1:53 PM Christian Loehle
>>> <christian.loehle@arm.com> wrote:
>>>>
>>>> On 3/13/26 12:25, Rafael J. Wysocki wrote:
>>>>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>>>>
>>>>> There are two special cases in the idle loop that are handled
>>>>> inconsistently even though they are analogous.
>>>>>
>>>>> The first one is when a cpuidle driver is absent and the default CPU
>>>>> idle time power management implemented by the architecture code is used.
>>>>> In that case, the scheduler tick is stopped every time before invoking
>>>>> default_idle_call().
>>>>>
>>>>> The second one is when a cpuidle driver is present, but there is only
>>>>> one idle state in its table. In that case, the scheduler tick is never
>>>>> stopped.
>>>>>
>>>>> Since each of these approaches leads to suboptimal choices in some
>>>>> cases, reconcile them with the help of one simple heuristic. Namely,
>>>>> stop the tick if the CPU has been woken up by it in the previous
>>>>> iteration of the idle loop, or let it tick otherwise.>
>>>>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>>>> ---
>>>>>
>>>>> Based on today's mainline.
>>>>>
>>>>> ---
>>>>> kernel/sched/idle.c | 30 +++++++++++++++++++++---------
>>>>> 1 file changed, 21 insertions(+), 9 deletions(-)
>>>>>
>>>>> --- a/kernel/sched/idle.c
>>>>> +++ b/kernel/sched/idle.c
>>>>> @@ -161,6 +161,14 @@ static int call_cpuidle(struct cpuidle_d
>>>>> return cpuidle_enter(drv, dev, next_state);
>>>>> }
>>>>>
>>>>> +static void idle_call_stop_or_retain_tick(bool stop_tick)
>>>>> +{
>>>>> + if (stop_tick || tick_nohz_tick_stopped())
>>>>> + tick_nohz_idle_stop_tick();
>>>>> + else
>>>>> + tick_nohz_idle_retain_tick();
>>>>> +}
>>>>> +
>>>>> /**
>>>>> * cpuidle_idle_call - the main idle function
>>>>> *
>>>>> @@ -170,7 +178,7 @@ static int call_cpuidle(struct cpuidle_d
>>>>> * set, and it returns with polling set. If it ever stops polling, it
>>>>> * must clear the polling bit.
>>>>> */
>>>>> -static void cpuidle_idle_call(void)
>>>>> +static void cpuidle_idle_call(bool stop_tick)
>>>>> {
>>>>> struct cpuidle_device *dev = cpuidle_get_device();
>>>>> struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
>>>>> @@ -186,7 +194,7 @@ static void cpuidle_idle_call(void)
>>>>> }
>>>>>
>>>>> if (cpuidle_not_available(drv, dev)) {
>>>>> - tick_nohz_idle_stop_tick();
>>>>> + idle_call_stop_or_retain_tick(stop_tick);
>>>>>
>>>>> default_idle_call();
>>>>> goto exit_idle;
>>>>> @@ -222,17 +230,19 @@ static void cpuidle_idle_call(void)
>>>>> next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns);
>>>>> call_cpuidle(drv, dev, next_state);
>>>>> } else if (drv->state_count > 1) {
>>>>> - bool stop_tick = true;
>>>>> + /*
>>>>> + * stop_tick is expected to be true by default by cpuidle
>>>>> + * governors, which allows them to select idle states with
>>>>> + * target residency above the tick period length.
>>>>> + */
>>>>> + stop_tick = true;
>>>>>
>>>>> /*
>>>>> * Ask the cpuidle framework to choose a convenient idle state.
>>>>> */
>>>>> next_state = cpuidle_select(drv, dev, &stop_tick);
>>>>>
>>>>> - if (stop_tick || tick_nohz_tick_stopped())
>>>>> - tick_nohz_idle_stop_tick();
>>>>> - else
>>>>> - tick_nohz_idle_retain_tick();
>>>>> + idle_call_stop_or_retain_tick(stop_tick);
>>>>>
>>>>> entered_state = call_cpuidle(drv, dev, next_state);
>>>>> /*
>>>>> @@ -240,7 +250,7 @@ static void cpuidle_idle_call(void)
>>>>> */
>>>>> cpuidle_reflect(dev, entered_state);
>>>>> } else {
>>>>> - tick_nohz_idle_retain_tick();
>>>>> + idle_call_stop_or_retain_tick(stop_tick);
>>>>
>>>> This would supersede e5c9ffc6ae1b ("cpuidle: Skip governor when only one idle state is available")
>>>> so we should remove that code too.
>>>
>>> Which code? Do you mean the one that has been removed by
>>>
>>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d557640e4ce589a24dca5ca7ce3b9680f471325f
>>>
>> Ah of course, sorry!
>
> No worries.
>
>> Reviewed-by: Christian Loehle <christian.loehle@arm.com>
>
> So should I regard this as a fix for 7.0?
>
> I guess so because I don't think it would be useful to ship 7.0
> without it only to change the behavior immediately in 7.1. And I
> think that it can be treated as a fix for e5c9ffc6ae1b (above).
Yes I think it being a fix for e5c9ffc6ae1b should be fine (fingers crossed).
On Fri, Mar 13, 2026 at 4:45 PM Christian Loehle
<christian.loehle@arm.com> wrote:
>
> On 3/13/26 15:28, Rafael J. Wysocki wrote:
> > On Fri, Mar 13, 2026 at 3:04 PM Christian Loehle
> > <christian.loehle@arm.com> wrote:
> >>
> >> On 3/13/26 13:07, Rafael J. Wysocki wrote:
> >>> On Fri, Mar 13, 2026 at 1:53 PM Christian Loehle
> >>> <christian.loehle@arm.com> wrote:
> >>>>
> >>>> On 3/13/26 12:25, Rafael J. Wysocki wrote:
> >>>>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >>>>>
> >>>>> There are two special cases in the idle loop that are handled
> >>>>> inconsistently even though they are analogous.
> >>>>>
> >>>>> The first one is when a cpuidle driver is absent and the default CPU
> >>>>> idle time power management implemented by the architecture code is used.
> >>>>> In that case, the scheduler tick is stopped every time before invoking
> >>>>> default_idle_call().
> >>>>>
> >>>>> The second one is when a cpuidle driver is present, but there is only
> >>>>> one idle state in its table. In that case, the scheduler tick is never
> >>>>> stopped.
> >>>>>
> >>>>> Since each of these approaches leads to suboptimal choices in some
> >>>>> cases, reconcile them with the help of one simple heuristic. Namely,
> >>>>> stop the tick if the CPU has been woken up by it in the previous
> >>>>> iteration of the idle loop, or let it tick otherwise.>
> >>>>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >>>>> ---
> >>>>>
> >>>>> Based on today's mainline.
> >>>>>
> >>>>> ---
> >>>>> kernel/sched/idle.c | 30 +++++++++++++++++++++---------
> >>>>> 1 file changed, 21 insertions(+), 9 deletions(-)
> >>>>>
> >>>>> --- a/kernel/sched/idle.c
> >>>>> +++ b/kernel/sched/idle.c
> >>>>> @@ -161,6 +161,14 @@ static int call_cpuidle(struct cpuidle_d
> >>>>> return cpuidle_enter(drv, dev, next_state);
> >>>>> }
> >>>>>
> >>>>> +static void idle_call_stop_or_retain_tick(bool stop_tick)
> >>>>> +{
> >>>>> + if (stop_tick || tick_nohz_tick_stopped())
> >>>>> + tick_nohz_idle_stop_tick();
> >>>>> + else
> >>>>> + tick_nohz_idle_retain_tick();
> >>>>> +}
> >>>>> +
> >>>>> /**
> >>>>> * cpuidle_idle_call - the main idle function
> >>>>> *
> >>>>> @@ -170,7 +178,7 @@ static int call_cpuidle(struct cpuidle_d
> >>>>> * set, and it returns with polling set. If it ever stops polling, it
> >>>>> * must clear the polling bit.
> >>>>> */
> >>>>> -static void cpuidle_idle_call(void)
> >>>>> +static void cpuidle_idle_call(bool stop_tick)
> >>>>> {
> >>>>> struct cpuidle_device *dev = cpuidle_get_device();
> >>>>> struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
> >>>>> @@ -186,7 +194,7 @@ static void cpuidle_idle_call(void)
> >>>>> }
> >>>>>
> >>>>> if (cpuidle_not_available(drv, dev)) {
> >>>>> - tick_nohz_idle_stop_tick();
> >>>>> + idle_call_stop_or_retain_tick(stop_tick);
> >>>>>
> >>>>> default_idle_call();
> >>>>> goto exit_idle;
> >>>>> @@ -222,17 +230,19 @@ static void cpuidle_idle_call(void)
> >>>>> next_state = cpuidle_find_deepest_state(drv, dev, max_latency_ns);
> >>>>> call_cpuidle(drv, dev, next_state);
> >>>>> } else if (drv->state_count > 1) {
> >>>>> - bool stop_tick = true;
> >>>>> + /*
> >>>>> + * stop_tick is expected to be true by default by cpuidle
> >>>>> + * governors, which allows them to select idle states with
> >>>>> + * target residency above the tick period length.
> >>>>> + */
> >>>>> + stop_tick = true;
> >>>>>
> >>>>> /*
> >>>>> * Ask the cpuidle framework to choose a convenient idle state.
> >>>>> */
> >>>>> next_state = cpuidle_select(drv, dev, &stop_tick);
> >>>>>
> >>>>> - if (stop_tick || tick_nohz_tick_stopped())
> >>>>> - tick_nohz_idle_stop_tick();
> >>>>> - else
> >>>>> - tick_nohz_idle_retain_tick();
> >>>>> + idle_call_stop_or_retain_tick(stop_tick);
> >>>>>
> >>>>> entered_state = call_cpuidle(drv, dev, next_state);
> >>>>> /*
> >>>>> @@ -240,7 +250,7 @@ static void cpuidle_idle_call(void)
> >>>>> */
> >>>>> cpuidle_reflect(dev, entered_state);
> >>>>> } else {
> >>>>> - tick_nohz_idle_retain_tick();
> >>>>> + idle_call_stop_or_retain_tick(stop_tick);
> >>>>
> >>>> This would supersede e5c9ffc6ae1b ("cpuidle: Skip governor when only one idle state is available")
> >>>> so we should remove that code too.
> >>>
> >>> Which code? Do you mean the one that has been removed by
> >>>
> >>> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=d557640e4ce589a24dca5ca7ce3b9680f471325f
> >>>
> >> Ah of course, sorry!
> >
> > No worries.
> >
> >> Reviewed-by: Christian Loehle <christian.loehle@arm.com>
> >
> > So should I regard this as a fix for 7.0?
> >
> > I guess so because I don't think it would be useful to ship 7.0
> > without it only to change the behavior immediately in 7.1. And I
> > think that it can be treated as a fix for e5c9ffc6ae1b (above).
>
> Yes I think it being a fix for e5c9ffc6ae1b should be fine (fingers crossed).
OK
So I'm going to queue it up for the next -rc. If anyone has
objections, please let me know.
© 2016 - 2026 Red Hat, Inc.