[PATCH v1 1/4] cpufreq: Make drivers using CPUFREQ_ETERNAL specify transition latency

Rafael J. Wysocki posted 1 patch 6 days, 7 hours ago
drivers/cpufreq/cpufreq-dt.c          |    2 +-
drivers/cpufreq/imx6q-cpufreq.c       |    2 +-
drivers/cpufreq/mediatek-cpufreq-hw.c |    2 +-
drivers/cpufreq/scmi-cpufreq.c        |    2 +-
drivers/cpufreq/scpi-cpufreq.c        |    2 +-
drivers/cpufreq/spear-cpufreq.c       |    2 +-
include/linux/cpufreq.h               |    3 +++
7 files changed, 9 insertions(+), 6 deletions(-)
[PATCH v1 1/4] cpufreq: Make drivers using CPUFREQ_ETERNAL specify transition latency
Posted by Rafael J. Wysocki 6 days, 7 hours ago
From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>

Commit a755d0e2d41b ("cpufreq: Honour transition_latency over
transition_delay_us") caused platforms where cpuinfo.transition_latency
is CPUFREQ_ETERNAL to get a very large transition latency whereas
previously it had been capped at 10 ms (and later at 2 ms).

This led to a user-observable regression between 6.6 and 6.12 as
described by Shawn:

"The dbs sampling_rate was 10000 us on 6.6 and suddently becomes
 6442450 us (4294967295 / 1000 * 1.5) on 6.12 for these platforms
 because the default transition delay was dropped [...].

 It slows down dbs governor's reacting to CPU loading change
 dramatically.  Also, as transition_delay_us is used by schedutil
 governor as rate_limit_us, it shows a negative impact on device
 idle power consumption, because the device gets slightly less time
 in the lowest OPP."

Evidently, the expectation of the drivers using CPUFREQ_ETERNAL as
cpuinfo.transition_latency was that it would be capped by the core,
but they may as well return a default transition latency value instead
of CPUFREQ_ETERNAL and the core need not do anything with it.

Accordingly, introduce CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS and make
all of the drivers in question use it instead of CPUFREQ_ETERNAL.

Fixes: a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us")
Closes: https://lore.kernel.org/linux-pm/20250922125929.453444-1-shawnguo2@yeah.net/
Reported-by: Shawn Guo <shawnguo@kernel.org>
Cc: 6.6+ <stable@vger.kernel.org> # 6.6+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq-dt.c          |    2 +-
 drivers/cpufreq/imx6q-cpufreq.c       |    2 +-
 drivers/cpufreq/mediatek-cpufreq-hw.c |    2 +-
 drivers/cpufreq/scmi-cpufreq.c        |    2 +-
 drivers/cpufreq/scpi-cpufreq.c        |    2 +-
 drivers/cpufreq/spear-cpufreq.c       |    2 +-
 include/linux/cpufreq.h               |    3 +++
 7 files changed, 9 insertions(+), 6 deletions(-)

--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -104,7 +104,7 @@ static int cpufreq_init(struct cpufreq_p
 
 	transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev);
 	if (!transition_latency)
-		transition_latency = CPUFREQ_ETERNAL;
+		transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
 
 	cpumask_copy(policy->cpus, priv->cpus);
 	policy->driver_data = priv;
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -442,7 +442,7 @@ soc_opp_out:
 	}
 
 	if (of_property_read_u32(np, "clock-latency", &transition_latency))
-		transition_latency = CPUFREQ_ETERNAL;
+		transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
 
 	/*
 	 * Calculate the ramp time for max voltage change in the
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -309,7 +309,7 @@ static int mtk_cpufreq_hw_cpu_init(struc
 
 	latency = readl_relaxed(data->reg_bases[REG_FREQ_LATENCY]) * 1000;
 	if (!latency)
-		latency = CPUFREQ_ETERNAL;
+		latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
 
 	policy->cpuinfo.transition_latency = latency;
 	policy->fast_switch_possible = true;
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -294,7 +294,7 @@ static int scmi_cpufreq_init(struct cpuf
 
 	latency = perf_ops->transition_latency_get(ph, domain);
 	if (!latency)
-		latency = CPUFREQ_ETERNAL;
+		latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
 
 	policy->cpuinfo.transition_latency = latency;
 
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -157,7 +157,7 @@ static int scpi_cpufreq_init(struct cpuf
 
 	latency = scpi_ops->get_transition_latency(cpu_dev);
 	if (!latency)
-		latency = CPUFREQ_ETERNAL;
+		latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
 
 	policy->cpuinfo.transition_latency = latency;
 
--- a/drivers/cpufreq/spear-cpufreq.c
+++ b/drivers/cpufreq/spear-cpufreq.c
@@ -182,7 +182,7 @@ static int spear_cpufreq_probe(struct pl
 
 	if (of_property_read_u32(np, "clock-latency",
 				&spear_cpufreq.transition_latency))
-		spear_cpufreq.transition_latency = CPUFREQ_ETERNAL;
+		spear_cpufreq.transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
 
 	cnt = of_property_count_u32_elems(np, "cpufreq_tbl");
 	if (cnt <= 0) {
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -32,6 +32,9 @@
  */
 
 #define CPUFREQ_ETERNAL			(-1)
+
+#define CPUFREQ_DEFAULT_TANSITION_LATENCY_NS	NSEC_PER_MSEC
+
 #define CPUFREQ_NAME_LEN		16
 /* Print length for names. Extra 1 space for accommodating '\n' in prints */
 #define CPUFREQ_NAME_PLEN		(CPUFREQ_NAME_LEN + 1)
Re: [PATCH v1 1/4] cpufreq: Make drivers using CPUFREQ_ETERNAL specify transition latency
Posted by Jie Zhan 5 days, 13 hours ago

On 9/25/2025 11:44 PM, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> Commit a755d0e2d41b ("cpufreq: Honour transition_latency over
> transition_delay_us") caused platforms where cpuinfo.transition_latency
> is CPUFREQ_ETERNAL to get a very large transition latency whereas
> previously it had been capped at 10 ms (and later at 2 ms).
> 
> This led to a user-observable regression between 6.6 and 6.12 as
> described by Shawn:
> 
> "The dbs sampling_rate was 10000 us on 6.6 and suddently becomes
>  6442450 us (4294967295 / 1000 * 1.5) on 6.12 for these platforms
>  because the default transition delay was dropped [...].
> 
>  It slows down dbs governor's reacting to CPU loading change
>  dramatically.  Also, as transition_delay_us is used by schedutil
>  governor as rate_limit_us, it shows a negative impact on device
>  idle power consumption, because the device gets slightly less time
>  in the lowest OPP."
> 
> Evidently, the expectation of the drivers using CPUFREQ_ETERNAL as
> cpuinfo.transition_latency was that it would be capped by the core,
> but they may as well return a default transition latency value instead
> of CPUFREQ_ETERNAL and the core need not do anything with it.
> 
> Accordingly, introduce CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS and make
> all of the drivers in question use it instead of CPUFREQ_ETERNAL.
> 
> Fixes: a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us")
> Closes: https://lore.kernel.org/linux-pm/20250922125929.453444-1-shawnguo2@yeah.net/
> Reported-by: Shawn Guo <shawnguo@kernel.org>
> Cc: 6.6+ <stable@vger.kernel.org> # 6.6+
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Thanks, we've seen similar issues.

Reviewed-by: Jie Zhan <zhanjie9@hisilicon.com>
> ---
>  drivers/cpufreq/cpufreq-dt.c          |    2 +-
>  drivers/cpufreq/imx6q-cpufreq.c       |    2 +-
>  drivers/cpufreq/mediatek-cpufreq-hw.c |    2 +-
>  drivers/cpufreq/scmi-cpufreq.c        |    2 +-
>  drivers/cpufreq/scpi-cpufreq.c        |    2 +-
>  drivers/cpufreq/spear-cpufreq.c       |    2 +-
>  include/linux/cpufreq.h               |    3 +++
>  7 files changed, 9 insertions(+), 6 deletions(-)
Re: [PATCH v1 1/4] cpufreq: Make drivers using CPUFREQ_ETERNAL specify transition latency
Posted by Mario Limonciello 6 days, 6 hours ago

On 9/25/2025 10:44 AM, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
> 
> Commit a755d0e2d41b ("cpufreq: Honour transition_latency over
> transition_delay_us") caused platforms where cpuinfo.transition_latency
> is CPUFREQ_ETERNAL to get a very large transition latency whereas
> previously it had been capped at 10 ms (and later at 2 ms).
> 
> This led to a user-observable regression between 6.6 and 6.12 as
> described by Shawn:
> 
> "The dbs sampling_rate was 10000 us on 6.6 and suddently becomes
>   6442450 us (4294967295 / 1000 * 1.5) on 6.12 for these platforms
>   because the default transition delay was dropped [...].
> 
>   It slows down dbs governor's reacting to CPU loading change
>   dramatically.  Also, as transition_delay_us is used by schedutil
>   governor as rate_limit_us, it shows a negative impact on device
>   idle power consumption, because the device gets slightly less time
>   in the lowest OPP."
> 
> Evidently, the expectation of the drivers using CPUFREQ_ETERNAL as
> cpuinfo.transition_latency was that it would be capped by the core,
> but they may as well return a default transition latency value instead
> of CPUFREQ_ETERNAL and the core need not do anything with it.
> 
> Accordingly, introduce CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS and make
> all of the drivers in question use it instead of CPUFREQ_ETERNAL.
> 
> Fixes: a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us")
> Closes: https://lore.kernel.org/linux-pm/20250922125929.453444-1-shawnguo2@yeah.net/
> Reported-by: Shawn Guo <shawnguo@kernel.org>

Not 100% sure, but I think checkpatch gets pedantic about Closes 
followed by Reported-by and instead wants Reported-by followed by Closes.

> Cc: 6.6+ <stable@vger.kernel.org> # 6.6+
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>> ---
>   drivers/cpufreq/cpufreq-dt.c          |    2 +-
>   drivers/cpufreq/imx6q-cpufreq.c       |    2 +-
>   drivers/cpufreq/mediatek-cpufreq-hw.c |    2 +-
>   drivers/cpufreq/scmi-cpufreq.c        |    2 +-
>   drivers/cpufreq/scpi-cpufreq.c        |    2 +-
>   drivers/cpufreq/spear-cpufreq.c       |    2 +-
>   include/linux/cpufreq.h               |    3 +++
>   7 files changed, 9 insertions(+), 6 deletions(-)
> 
> --- a/drivers/cpufreq/cpufreq-dt.c
> +++ b/drivers/cpufreq/cpufreq-dt.c
> @@ -104,7 +104,7 @@ static int cpufreq_init(struct cpufreq_p
>   
>   	transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev);
>   	if (!transition_latency)
> -		transition_latency = CPUFREQ_ETERNAL;
> +		transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
>   
>   	cpumask_copy(policy->cpus, priv->cpus);
>   	policy->driver_data = priv;
> --- a/drivers/cpufreq/imx6q-cpufreq.c
> +++ b/drivers/cpufreq/imx6q-cpufreq.c
> @@ -442,7 +442,7 @@ soc_opp_out:
>   	}
>   
>   	if (of_property_read_u32(np, "clock-latency", &transition_latency))
> -		transition_latency = CPUFREQ_ETERNAL;
> +		transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
>   
>   	/*
>   	 * Calculate the ramp time for max voltage change in the
> --- a/drivers/cpufreq/mediatek-cpufreq-hw.c
> +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
> @@ -309,7 +309,7 @@ static int mtk_cpufreq_hw_cpu_init(struc
>   
>   	latency = readl_relaxed(data->reg_bases[REG_FREQ_LATENCY]) * 1000;
>   	if (!latency)
> -		latency = CPUFREQ_ETERNAL;
> +		latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
>   
>   	policy->cpuinfo.transition_latency = latency;
>   	policy->fast_switch_possible = true;
> --- a/drivers/cpufreq/scmi-cpufreq.c
> +++ b/drivers/cpufreq/scmi-cpufreq.c
> @@ -294,7 +294,7 @@ static int scmi_cpufreq_init(struct cpuf
>   
>   	latency = perf_ops->transition_latency_get(ph, domain);
>   	if (!latency)
> -		latency = CPUFREQ_ETERNAL;
> +		latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
>   
>   	policy->cpuinfo.transition_latency = latency;
>   
> --- a/drivers/cpufreq/scpi-cpufreq.c
> +++ b/drivers/cpufreq/scpi-cpufreq.c
> @@ -157,7 +157,7 @@ static int scpi_cpufreq_init(struct cpuf
>   
>   	latency = scpi_ops->get_transition_latency(cpu_dev);
>   	if (!latency)
> -		latency = CPUFREQ_ETERNAL;
> +		latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
>   
>   	policy->cpuinfo.transition_latency = latency;
>   
> --- a/drivers/cpufreq/spear-cpufreq.c
> +++ b/drivers/cpufreq/spear-cpufreq.c
> @@ -182,7 +182,7 @@ static int spear_cpufreq_probe(struct pl
>   
>   	if (of_property_read_u32(np, "clock-latency",
>   				&spear_cpufreq.transition_latency))
> -		spear_cpufreq.transition_latency = CPUFREQ_ETERNAL;
> +		spear_cpufreq.transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
>   
>   	cnt = of_property_count_u32_elems(np, "cpufreq_tbl");
>   	if (cnt <= 0) {
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -32,6 +32,9 @@
>    */
>   
>   #define CPUFREQ_ETERNAL			(-1)
> +
> +#define CPUFREQ_DEFAULT_TANSITION_LATENCY_NS	NSEC_PER_MSEC
> +
>   #define CPUFREQ_NAME_LEN		16
>   /* Print length for names. Extra 1 space for accommodating '\n' in prints */
>   #define CPUFREQ_NAME_PLEN		(CPUFREQ_NAME_LEN + 1)
> 
> 
>