[PATCH v1 4/4] cpuidle: governors: teo: Decay metrics below DECAY_SHIFT threshold

Rafael J. Wysocki posted 1 patch 2 months, 3 weeks ago
drivers/cpuidle/governors/teo.c |   20 +++++++++++++++-----
1 file changed, 15 insertions(+), 5 deletions(-)
[PATCH v1 4/4] cpuidle: governors: teo: Decay metrics below DECAY_SHIFT threshold
Posted by Rafael J. Wysocki 2 months, 3 weeks ago
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

If a given governor metric falls below a certain value (8 for
DECAY_SHIFT equal to 3), it will not decay any more due to the
simplistic decay implementation.  This may in some cases lead to
subtle inconsistencies in the governor behavior, so change the
decay implementation to take it into account and set the metric
at hand to 0 in that case.

Suggested-by: Christian Loehle <christian.loehle@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/teo.c |   20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -148,6 +148,16 @@ struct teo_cpu {
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
 
+static void teo_decay(unsigned int *metric)
+{
+	unsigned int delta = *metric >> DECAY_SHIFT;
+
+	if (delta)
+		*metric -= delta;
+	else
+		*metric = 0;
+}
+
 /**
  * teo_update - Update CPU metrics after wakeup.
  * @drv: cpuidle driver containing state data.
@@ -159,7 +169,7 @@ static void teo_update(struct cpuidle_dr
 	int i, idx_timer = 0, idx_duration = 0;
 	s64 target_residency_ns, measured_ns;
 
-	cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT;
+	teo_decay(&cpu_data->short_idles);
 
 	if (cpu_data->artificial_wakeup) {
 		/*
@@ -195,8 +205,8 @@ static void teo_update(struct cpuidle_dr
 	for (i = 0; i < drv->state_count; i++) {
 		struct teo_bin *bin = &cpu_data->state_bins[i];
 
-		bin->hits -= bin->hits >> DECAY_SHIFT;
-		bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
+		teo_decay(&bin->hits);
+		teo_decay(&bin->intercepts);
 
 		target_residency_ns = drv->states[i].target_residency_ns;
 
@@ -207,7 +217,7 @@ static void teo_update(struct cpuidle_dr
 		}
 	}
 
-	cpu_data->tick_intercepts -= cpu_data->tick_intercepts >> DECAY_SHIFT;
+	teo_decay(&cpu_data->tick_intercepts);
 	/*
 	 * If the measured idle duration falls into the same bin as the sleep
 	 * length, this is a "hit", so update the "hits" metric for that bin.
@@ -222,7 +232,7 @@ static void teo_update(struct cpuidle_dr
 			cpu_data->tick_intercepts += PULSE;
 	}
 
-	cpu_data->total -= cpu_data->total >> DECAY_SHIFT;
+	teo_decay(&cpu_data->total);
 	cpu_data->total += PULSE;
 }
Re: [PATCH v1 4/4] cpuidle: governors: teo: Decay metrics below DECAY_SHIFT threshold
Posted by Christian Loehle 2 months, 3 weeks ago
On 11/12/25 16:25, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> If a given governor metric falls below a certain value (8 for
> DECAY_SHIFT equal to 3), it will not decay any more due to the
> simplistic decay implementation.  This may in some cases lead to
> subtle inconsistencies in the governor behavior, so change the
> decay implementation to take it into account and set the metric
> at hand to 0 in that case.
> 
> Suggested-by: Christian Loehle <christian.loehle@arm.com>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> ---
>  drivers/cpuidle/governors/teo.c |   20 +++++++++++++++-----
>  1 file changed, 15 insertions(+), 5 deletions(-)
> 
> --- a/drivers/cpuidle/governors/teo.c
> +++ b/drivers/cpuidle/governors/teo.c
> @@ -148,6 +148,16 @@ struct teo_cpu {
>  
>  static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
>  
> +static void teo_decay(unsigned int *metric)
> +{
> +	unsigned int delta = *metric >> DECAY_SHIFT;
> +
> +	if (delta)
> +		*metric -= delta;
> +	else
> +		*metric = 0;
> +}
> +
>  /**
>   * teo_update - Update CPU metrics after wakeup.
>   * @drv: cpuidle driver containing state data.
> @@ -159,7 +169,7 @@ static void teo_update(struct cpuidle_dr
>  	int i, idx_timer = 0, idx_duration = 0;
>  	s64 target_residency_ns, measured_ns;
>  
> -	cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT;
> +	teo_decay(&cpu_data->short_idles);
>  
>  	if (cpu_data->artificial_wakeup) {
>  		/*
> @@ -195,8 +205,8 @@ static void teo_update(struct cpuidle_dr
>  	for (i = 0; i < drv->state_count; i++) {
>  		struct teo_bin *bin = &cpu_data->state_bins[i];
>  
> -		bin->hits -= bin->hits >> DECAY_SHIFT;
> -		bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
> +		teo_decay(&bin->hits);
> +		teo_decay(&bin->intercepts);
>  
>  		target_residency_ns = drv->states[i].target_residency_ns;
>  
> @@ -207,7 +217,7 @@ static void teo_update(struct cpuidle_dr
>  		}
>  	}
>  
> -	cpu_data->tick_intercepts -= cpu_data->tick_intercepts >> DECAY_SHIFT;
> +	teo_decay(&cpu_data->tick_intercepts);
>  	/*
>  	 * If the measured idle duration falls into the same bin as the sleep
>  	 * length, this is a "hit", so update the "hits" metric for that bin.
> @@ -222,7 +232,7 @@ static void teo_update(struct cpuidle_dr
>  			cpu_data->tick_intercepts += PULSE;
>  	}
>  
> -	cpu_data->total -= cpu_data->total >> DECAY_SHIFT;
> +	teo_decay(&cpu_data->total);
>  	cpu_data->total += PULSE;

This will result in total no longer being a strict sum of the bins.
Any reason not to do something like:

-----8<-----

diff --git a/drivers/cpuidle/governors/teo.c b/drivers/cpuidle/governors/teo.c
index e5b795cf3155..ff58d70ee80d 100644
--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -148,14 +148,19 @@ struct teo_cpu {
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
 
-static void teo_decay(unsigned int *metric)
+static unsigned int teo_decay(unsigned int *metric)
 {
        unsigned int delta = *metric >> DECAY_SHIFT;
+       unsigned int decay;
 
-       if (delta)
+       if (delta) {
                *metric -= delta;
-       else
-               *metric = 0;
+               return delta;
+       }
+
+       decay = *metric;
+       *metric = 0;
+       return decay;
 }
 
 /**
@@ -168,6 +173,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
        struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
        int i, idx_timer = 0, idx_duration = 0;
        s64 target_residency_ns, measured_ns;
+       unsigned int total_decay = 0;
 
        teo_decay(&cpu_data->short_idles);
 
@@ -205,8 +211,8 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
        for (i = 0; i < drv->state_count; i++) {
                struct teo_bin *bin = &cpu_data->state_bins[i];
 
-               teo_decay(&bin->hits);
-               teo_decay(&bin->intercepts);
+               total_decay += teo_decay(&bin->hits);
+               total_decay += teo_decay(&bin->intercepts);
 
                target_residency_ns = drv->states[i].target_residency_ns;
 
@@ -232,7 +238,7 @@ static void teo_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
                        cpu_data->tick_intercepts += PULSE;
        }
 
-       teo_decay(&cpu_data->total);
+       cpu_data->total -= total_decay;
        cpu_data->total += PULSE;
 }
Re: [PATCH v1 4/4] cpuidle: governors: teo: Decay metrics below DECAY_SHIFT threshold
Posted by Rafael J. Wysocki 2 months, 3 weeks ago
On Wed, Nov 12, 2025 at 6:29 PM Christian Loehle
<christian.loehle@arm.com> wrote:
>
> On 11/12/25 16:25, Rafael J. Wysocki wrote:
> > From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> >
> > If a given governor metric falls below a certain value (8 for
> > DECAY_SHIFT equal to 3), it will not decay any more due to the
> > simplistic decay implementation.  This may in some cases lead to
> > subtle inconsistencies in the governor behavior, so change the
> > decay implementation to take it into account and set the metric
> > at hand to 0 in that case.
> >
> > Suggested-by: Christian Loehle <christian.loehle@arm.com>
> > Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> > ---
> >  drivers/cpuidle/governors/teo.c |   20 +++++++++++++++-----
> >  1 file changed, 15 insertions(+), 5 deletions(-)
> >
> > --- a/drivers/cpuidle/governors/teo.c
> > +++ b/drivers/cpuidle/governors/teo.c
> > @@ -148,6 +148,16 @@ struct teo_cpu {
> >
> >  static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
> >
> > +static void teo_decay(unsigned int *metric)
> > +{
> > +     unsigned int delta = *metric >> DECAY_SHIFT;
> > +
> > +     if (delta)
> > +             *metric -= delta;
> > +     else
> > +             *metric = 0;
> > +}
> > +
> >  /**
> >   * teo_update - Update CPU metrics after wakeup.
> >   * @drv: cpuidle driver containing state data.
> > @@ -159,7 +169,7 @@ static void teo_update(struct cpuidle_dr
> >       int i, idx_timer = 0, idx_duration = 0;
> >       s64 target_residency_ns, measured_ns;
> >
> > -     cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT;
> > +     teo_decay(&cpu_data->short_idles);
> >
> >       if (cpu_data->artificial_wakeup) {
> >               /*
> > @@ -195,8 +205,8 @@ static void teo_update(struct cpuidle_dr
> >       for (i = 0; i < drv->state_count; i++) {
> >               struct teo_bin *bin = &cpu_data->state_bins[i];
> >
> > -             bin->hits -= bin->hits >> DECAY_SHIFT;
> > -             bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
> > +             teo_decay(&bin->hits);
> > +             teo_decay(&bin->intercepts);
> >
> >               target_residency_ns = drv->states[i].target_residency_ns;
> >
> > @@ -207,7 +217,7 @@ static void teo_update(struct cpuidle_dr
> >               }
> >       }
> >
> > -     cpu_data->tick_intercepts -= cpu_data->tick_intercepts >> DECAY_SHIFT;
> > +     teo_decay(&cpu_data->tick_intercepts);
> >       /*
> >        * If the measured idle duration falls into the same bin as the sleep
> >        * length, this is a "hit", so update the "hits" metric for that bin.
> > @@ -222,7 +232,7 @@ static void teo_update(struct cpuidle_dr
> >                       cpu_data->tick_intercepts += PULSE;
> >       }
> >
> > -     cpu_data->total -= cpu_data->total >> DECAY_SHIFT;
> > +     teo_decay(&cpu_data->total);
> >       cpu_data->total += PULSE;
>
> This will result in total no longer being a strict sum of the bins.

Ah, good point.

> Any reason not to do something like:

Well, it would be more straightforward to just compute "total" from
scratch instead of using total_decay (it would be the same amount of
computation minus the teo_decay() changes AFAICS).

I'll send an update of this patch.
Re: [PATCH v1 4/4] cpuidle: governors: teo: Decay metrics below DECAY_SHIFT threshold
Posted by Christian Loehle 2 months, 3 weeks ago
On 11/12/25 17:51, Rafael J. Wysocki wrote:
> On Wed, Nov 12, 2025 at 6:29 PM Christian Loehle
> <christian.loehle@arm.com> wrote:
>>
>> On 11/12/25 16:25, Rafael J. Wysocki wrote:
>>> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>>
>>> If a given governor metric falls below a certain value (8 for
>>> DECAY_SHIFT equal to 3), it will not decay any more due to the
>>> simplistic decay implementation.  This may in some cases lead to
>>> subtle inconsistencies in the governor behavior, so change the
>>> decay implementation to take it into account and set the metric
>>> at hand to 0 in that case.
>>>
>>> Suggested-by: Christian Loehle <christian.loehle@arm.com>
>>> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>>> ---
>>>  drivers/cpuidle/governors/teo.c |   20 +++++++++++++++-----
>>>  1 file changed, 15 insertions(+), 5 deletions(-)
>>>
>>> --- a/drivers/cpuidle/governors/teo.c
>>> +++ b/drivers/cpuidle/governors/teo.c
>>> @@ -148,6 +148,16 @@ struct teo_cpu {
>>>
>>>  static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
>>>
>>> +static void teo_decay(unsigned int *metric)
>>> +{
>>> +     unsigned int delta = *metric >> DECAY_SHIFT;
>>> +
>>> +     if (delta)
>>> +             *metric -= delta;
>>> +     else
>>> +             *metric = 0;
>>> +}
>>> +
>>>  /**
>>>   * teo_update - Update CPU metrics after wakeup.
>>>   * @drv: cpuidle driver containing state data.
>>> @@ -159,7 +169,7 @@ static void teo_update(struct cpuidle_dr
>>>       int i, idx_timer = 0, idx_duration = 0;
>>>       s64 target_residency_ns, measured_ns;
>>>
>>> -     cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT;
>>> +     teo_decay(&cpu_data->short_idles);
>>>
>>>       if (cpu_data->artificial_wakeup) {
>>>               /*
>>> @@ -195,8 +205,8 @@ static void teo_update(struct cpuidle_dr
>>>       for (i = 0; i < drv->state_count; i++) {
>>>               struct teo_bin *bin = &cpu_data->state_bins[i];
>>>
>>> -             bin->hits -= bin->hits >> DECAY_SHIFT;
>>> -             bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
>>> +             teo_decay(&bin->hits);
>>> +             teo_decay(&bin->intercepts);
>>>
>>>               target_residency_ns = drv->states[i].target_residency_ns;
>>>
>>> @@ -207,7 +217,7 @@ static void teo_update(struct cpuidle_dr
>>>               }
>>>       }
>>>
>>> -     cpu_data->tick_intercepts -= cpu_data->tick_intercepts >> DECAY_SHIFT;
>>> +     teo_decay(&cpu_data->tick_intercepts);
>>>       /*
>>>        * If the measured idle duration falls into the same bin as the sleep
>>>        * length, this is a "hit", so update the "hits" metric for that bin.
>>> @@ -222,7 +232,7 @@ static void teo_update(struct cpuidle_dr
>>>                       cpu_data->tick_intercepts += PULSE;
>>>       }
>>>
>>> -     cpu_data->total -= cpu_data->total >> DECAY_SHIFT;
>>> +     teo_decay(&cpu_data->total);
>>>       cpu_data->total += PULSE;
>>
>> This will result in total no longer being a strict sum of the bins.
> 
> Ah, good point.
> 
>> Any reason not to do something like:
> 
> Well, it would be more straightforward to just compute "total" from
> scratch instead of using total_decay (it would be the same amount of
> computation minus the teo_decay() changes AFAICS).

Duh, of course...

> 
> I'll send an update of this patch.

Thanks!
[PATCH v2 4/4] cpuidle: governors: teo: Decay metrics below DECAY_SHIFT threshold
Posted by Rafael J. Wysocki 2 months, 3 weeks ago
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

If a given governor metric falls below a certain value (8 for
DECAY_SHIFT equal to 3), it will not decay any more due to the
simplistic decay implementation.  This may in some cases lead to
subtle inconsistencies in the governor behavior, so change the
decay implementation to take it into account and set the metric
at hand to 0 in that case.

Suggested-by: Christian Loehle <christian.loehle@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---

v1 -> v2:
   * Ensure that cpu_data->total is always the sum of the intercepts and hits
     metrics for all of the idle states (Christian).

---
 drivers/cpuidle/governors/teo.c |   26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

--- a/drivers/cpuidle/governors/teo.c
+++ b/drivers/cpuidle/governors/teo.c
@@ -148,6 +148,16 @@ struct teo_cpu {
 
 static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
 
+static void teo_decay(unsigned int *metric)
+{
+	unsigned int delta = *metric >> DECAY_SHIFT;
+
+	if (delta)
+		*metric -= delta;
+	else
+		*metric = 0;
+}
+
 /**
  * teo_update - Update CPU metrics after wakeup.
  * @drv: cpuidle driver containing state data.
@@ -158,8 +168,9 @@ static void teo_update(struct cpuidle_dr
 	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
 	int i, idx_timer = 0, idx_duration = 0;
 	s64 target_residency_ns, measured_ns;
+	unsigned int total = 0;
 
-	cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT;
+	teo_decay(&cpu_data->short_idles);
 
 	if (cpu_data->artificial_wakeup) {
 		/*
@@ -195,8 +206,10 @@ static void teo_update(struct cpuidle_dr
 	for (i = 0; i < drv->state_count; i++) {
 		struct teo_bin *bin = &cpu_data->state_bins[i];
 
-		bin->hits -= bin->hits >> DECAY_SHIFT;
-		bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
+		teo_decay(&bin->hits);
+		total += bin->hits;
+		teo_decay(&bin->intercepts);
+		total += bin->intercepts;
 
 		target_residency_ns = drv->states[i].target_residency_ns;
 
@@ -207,7 +220,9 @@ static void teo_update(struct cpuidle_dr
 		}
 	}
 
-	cpu_data->tick_intercepts -= cpu_data->tick_intercepts >> DECAY_SHIFT;
+	cpu_data->total = total + PULSE;
+
+	teo_decay(&cpu_data->tick_intercepts);
 	/*
 	 * If the measured idle duration falls into the same bin as the sleep
 	 * length, this is a "hit", so update the "hits" metric for that bin.
@@ -221,9 +236,6 @@ static void teo_update(struct cpuidle_dr
 		if (TICK_NSEC <= measured_ns)
 			cpu_data->tick_intercepts += PULSE;
 	}
-
-	cpu_data->total -= cpu_data->total >> DECAY_SHIFT;
-	cpu_data->total += PULSE;
 }
 
 static bool teo_state_ok(int i, struct cpuidle_driver *drv)
Re: [PATCH v2 4/4] cpuidle: governors: teo: Decay metrics below DECAY_SHIFT threshold
Posted by Christian Loehle 2 months, 3 weeks ago
On 11/12/25 18:03, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> If a given governor metric falls below a certain value (8 for
> DECAY_SHIFT equal to 3), it will not decay any more due to the
> simplistic decay implementation.  This may in some cases lead to
> subtle inconsistencies in the governor behavior, so change the
> decay implementation to take it into account and set the metric
> at hand to 0 in that case.
> 
> Suggested-by: Christian Loehle <christian.loehle@arm.com>
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Reviewed-by: Christian Loehle <christian.loehle@arm.com>

> ---
> 
> v1 -> v2:
>    * Ensure that cpu_data->total is always the sum of the intercepts and hits
>      metrics for all of the idle states (Christian).
> 
> ---
>  drivers/cpuidle/governors/teo.c |   26 +++++++++++++++++++-------
>  1 file changed, 19 insertions(+), 7 deletions(-)
> 
> --- a/drivers/cpuidle/governors/teo.c
> +++ b/drivers/cpuidle/governors/teo.c
> @@ -148,6 +148,16 @@ struct teo_cpu {
>  
>  static DEFINE_PER_CPU(struct teo_cpu, teo_cpus);
>  
> +static void teo_decay(unsigned int *metric)
> +{
> +	unsigned int delta = *metric >> DECAY_SHIFT;
> +
> +	if (delta)
> +		*metric -= delta;
> +	else
> +		*metric = 0;
> +}
> +
>  /**
>   * teo_update - Update CPU metrics after wakeup.
>   * @drv: cpuidle driver containing state data.
> @@ -158,8 +168,9 @@ static void teo_update(struct cpuidle_dr
>  	struct teo_cpu *cpu_data = per_cpu_ptr(&teo_cpus, dev->cpu);
>  	int i, idx_timer = 0, idx_duration = 0;
>  	s64 target_residency_ns, measured_ns;
> +	unsigned int total = 0;
>  
> -	cpu_data->short_idles -= cpu_data->short_idles >> DECAY_SHIFT;
> +	teo_decay(&cpu_data->short_idles);
>  
>  	if (cpu_data->artificial_wakeup) {
>  		/*
> @@ -195,8 +206,10 @@ static void teo_update(struct cpuidle_dr
>  	for (i = 0; i < drv->state_count; i++) {
>  		struct teo_bin *bin = &cpu_data->state_bins[i];
>  
> -		bin->hits -= bin->hits >> DECAY_SHIFT;
> -		bin->intercepts -= bin->intercepts >> DECAY_SHIFT;
> +		teo_decay(&bin->hits);
> +		total += bin->hits;
> +		teo_decay(&bin->intercepts);
> +		total += bin->intercepts;
>  
>  		target_residency_ns = drv->states[i].target_residency_ns;
>  
> @@ -207,7 +220,9 @@ static void teo_update(struct cpuidle_dr
>  		}
>  	}
>  
> -	cpu_data->tick_intercepts -= cpu_data->tick_intercepts >> DECAY_SHIFT;
> +	cpu_data->total = total + PULSE;
> +
> +	teo_decay(&cpu_data->tick_intercepts);
>  	/*
>  	 * If the measured idle duration falls into the same bin as the sleep
>  	 * length, this is a "hit", so update the "hits" metric for that bin.
> @@ -221,9 +236,6 @@ static void teo_update(struct cpuidle_dr
>  		if (TICK_NSEC <= measured_ns)
>  			cpu_data->tick_intercepts += PULSE;
>  	}
> -
> -	cpu_data->total -= cpu_data->total >> DECAY_SHIFT;
> -	cpu_data->total += PULSE;
>  }
>  
>  static bool teo_state_ok(int i, struct cpuidle_driver *drv)
> 
> 
>