From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Commit a755d0e2d41b ("cpufreq: Honour transition_latency over
transition_delay_us") caused platforms where cpuinfo.transition_latency
is CPUFREQ_ETERNAL to get a very large transition latency whereas
previously it had been capped at 10 ms (and later at 2 ms).
This led to a user-observable regression between 6.6 and 6.12 as
described by Shawn:
"The dbs sampling_rate was 10000 us on 6.6 and suddently becomes
6442450 us (4294967295 / 1000 * 1.5) on 6.12 for these platforms
because the default transition delay was dropped [...].
It slows down dbs governor's reacting to CPU loading change
dramatically. Also, as transition_delay_us is used by schedutil
governor as rate_limit_us, it shows a negative impact on device
idle power consumption, because the device gets slightly less time
in the lowest OPP."
Evidently, the expectation of the drivers using CPUFREQ_ETERNAL as
cpuinfo.transition_latency was that it would be capped by the core,
but they may as well return a default transition latency value instead
of CPUFREQ_ETERNAL and the core need not do anything with it.
Accordingly, introduce CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS and make
all of the drivers in question use it instead of CPUFREQ_ETERNAL.
Fixes: a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us")
Closes: https://lore.kernel.org/linux-pm/20250922125929.453444-1-shawnguo2@yeah.net/
Reported-by: Shawn Guo <shawnguo@kernel.org>
Cc: 6.6+ <stable@vger.kernel.org> # 6.6+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
drivers/cpufreq/cpufreq-dt.c | 2 +-
drivers/cpufreq/imx6q-cpufreq.c | 2 +-
drivers/cpufreq/mediatek-cpufreq-hw.c | 2 +-
drivers/cpufreq/scmi-cpufreq.c | 2 +-
drivers/cpufreq/scpi-cpufreq.c | 2 +-
drivers/cpufreq/spear-cpufreq.c | 2 +-
include/linux/cpufreq.h | 3 +++
7 files changed, 9 insertions(+), 6 deletions(-)
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -104,7 +104,7 @@ static int cpufreq_init(struct cpufreq_p
transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev);
if (!transition_latency)
- transition_latency = CPUFREQ_ETERNAL;
+ transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
cpumask_copy(policy->cpus, priv->cpus);
policy->driver_data = priv;
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -442,7 +442,7 @@ soc_opp_out:
}
if (of_property_read_u32(np, "clock-latency", &transition_latency))
- transition_latency = CPUFREQ_ETERNAL;
+ transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
/*
* Calculate the ramp time for max voltage change in the
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -309,7 +309,7 @@ static int mtk_cpufreq_hw_cpu_init(struc
latency = readl_relaxed(data->reg_bases[REG_FREQ_LATENCY]) * 1000;
if (!latency)
- latency = CPUFREQ_ETERNAL;
+ latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
policy->cpuinfo.transition_latency = latency;
policy->fast_switch_possible = true;
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -294,7 +294,7 @@ static int scmi_cpufreq_init(struct cpuf
latency = perf_ops->transition_latency_get(ph, domain);
if (!latency)
- latency = CPUFREQ_ETERNAL;
+ latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
policy->cpuinfo.transition_latency = latency;
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -157,7 +157,7 @@ static int scpi_cpufreq_init(struct cpuf
latency = scpi_ops->get_transition_latency(cpu_dev);
if (!latency)
- latency = CPUFREQ_ETERNAL;
+ latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
policy->cpuinfo.transition_latency = latency;
--- a/drivers/cpufreq/spear-cpufreq.c
+++ b/drivers/cpufreq/spear-cpufreq.c
@@ -182,7 +182,7 @@ static int spear_cpufreq_probe(struct pl
if (of_property_read_u32(np, "clock-latency",
&spear_cpufreq.transition_latency))
- spear_cpufreq.transition_latency = CPUFREQ_ETERNAL;
+ spear_cpufreq.transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
cnt = of_property_count_u32_elems(np, "cpufreq_tbl");
if (cnt <= 0) {
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -32,6 +32,9 @@
*/
#define CPUFREQ_ETERNAL (-1)
+
+#define CPUFREQ_DEFAULT_TANSITION_LATENCY_NS NSEC_PER_MSEC
+
#define CPUFREQ_NAME_LEN 16
/* Print length for names. Extra 1 space for accommodating '\n' in prints */
#define CPUFREQ_NAME_PLEN (CPUFREQ_NAME_LEN + 1)
On 9/25/2025 11:44 PM, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>
> Commit a755d0e2d41b ("cpufreq: Honour transition_latency over
> transition_delay_us") caused platforms where cpuinfo.transition_latency
> is CPUFREQ_ETERNAL to get a very large transition latency whereas
> previously it had been capped at 10 ms (and later at 2 ms).
>
> This led to a user-observable regression between 6.6 and 6.12 as
> described by Shawn:
>
> "The dbs sampling_rate was 10000 us on 6.6 and suddently becomes
> 6442450 us (4294967295 / 1000 * 1.5) on 6.12 for these platforms
> because the default transition delay was dropped [...].
>
> It slows down dbs governor's reacting to CPU loading change
> dramatically. Also, as transition_delay_us is used by schedutil
> governor as rate_limit_us, it shows a negative impact on device
> idle power consumption, because the device gets slightly less time
> in the lowest OPP."
>
> Evidently, the expectation of the drivers using CPUFREQ_ETERNAL as
> cpuinfo.transition_latency was that it would be capped by the core,
> but they may as well return a default transition latency value instead
> of CPUFREQ_ETERNAL and the core need not do anything with it.
>
> Accordingly, introduce CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS and make
> all of the drivers in question use it instead of CPUFREQ_ETERNAL.
>
> Fixes: a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us")
> Closes: https://lore.kernel.org/linux-pm/20250922125929.453444-1-shawnguo2@yeah.net/
> Reported-by: Shawn Guo <shawnguo@kernel.org>
> Cc: 6.6+ <stable@vger.kernel.org> # 6.6+
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Thanks, we've seen similar issues.
Reviewed-by: Jie Zhan <zhanjie9@hisilicon.com>
> ---
> drivers/cpufreq/cpufreq-dt.c | 2 +-
> drivers/cpufreq/imx6q-cpufreq.c | 2 +-
> drivers/cpufreq/mediatek-cpufreq-hw.c | 2 +-
> drivers/cpufreq/scmi-cpufreq.c | 2 +-
> drivers/cpufreq/scpi-cpufreq.c | 2 +-
> drivers/cpufreq/spear-cpufreq.c | 2 +-
> include/linux/cpufreq.h | 3 +++
> 7 files changed, 9 insertions(+), 6 deletions(-)
On 9/25/2025 10:44 AM, Rafael J. Wysocki wrote:
> From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
>
> Commit a755d0e2d41b ("cpufreq: Honour transition_latency over
> transition_delay_us") caused platforms where cpuinfo.transition_latency
> is CPUFREQ_ETERNAL to get a very large transition latency whereas
> previously it had been capped at 10 ms (and later at 2 ms).
>
> This led to a user-observable regression between 6.6 and 6.12 as
> described by Shawn:
>
> "The dbs sampling_rate was 10000 us on 6.6 and suddently becomes
> 6442450 us (4294967295 / 1000 * 1.5) on 6.12 for these platforms
> because the default transition delay was dropped [...].
>
> It slows down dbs governor's reacting to CPU loading change
> dramatically. Also, as transition_delay_us is used by schedutil
> governor as rate_limit_us, it shows a negative impact on device
> idle power consumption, because the device gets slightly less time
> in the lowest OPP."
>
> Evidently, the expectation of the drivers using CPUFREQ_ETERNAL as
> cpuinfo.transition_latency was that it would be capped by the core,
> but they may as well return a default transition latency value instead
> of CPUFREQ_ETERNAL and the core need not do anything with it.
>
> Accordingly, introduce CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS and make
> all of the drivers in question use it instead of CPUFREQ_ETERNAL.
>
> Fixes: a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us")
> Closes: https://lore.kernel.org/linux-pm/20250922125929.453444-1-shawnguo2@yeah.net/
> Reported-by: Shawn Guo <shawnguo@kernel.org>
Not 100% sure, but I think checkpatch gets pedantic about Closes
followed by Reported-by and instead wants Reported-by followed by Closes.
> Cc: 6.6+ <stable@vger.kernel.org> # 6.6+
> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Mario Limonciello (AMD) <superm1@kernel.org>> ---
> drivers/cpufreq/cpufreq-dt.c | 2 +-
> drivers/cpufreq/imx6q-cpufreq.c | 2 +-
> drivers/cpufreq/mediatek-cpufreq-hw.c | 2 +-
> drivers/cpufreq/scmi-cpufreq.c | 2 +-
> drivers/cpufreq/scpi-cpufreq.c | 2 +-
> drivers/cpufreq/spear-cpufreq.c | 2 +-
> include/linux/cpufreq.h | 3 +++
> 7 files changed, 9 insertions(+), 6 deletions(-)
>
> --- a/drivers/cpufreq/cpufreq-dt.c
> +++ b/drivers/cpufreq/cpufreq-dt.c
> @@ -104,7 +104,7 @@ static int cpufreq_init(struct cpufreq_p
>
> transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev);
> if (!transition_latency)
> - transition_latency = CPUFREQ_ETERNAL;
> + transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
>
> cpumask_copy(policy->cpus, priv->cpus);
> policy->driver_data = priv;
> --- a/drivers/cpufreq/imx6q-cpufreq.c
> +++ b/drivers/cpufreq/imx6q-cpufreq.c
> @@ -442,7 +442,7 @@ soc_opp_out:
> }
>
> if (of_property_read_u32(np, "clock-latency", &transition_latency))
> - transition_latency = CPUFREQ_ETERNAL;
> + transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
>
> /*
> * Calculate the ramp time for max voltage change in the
> --- a/drivers/cpufreq/mediatek-cpufreq-hw.c
> +++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
> @@ -309,7 +309,7 @@ static int mtk_cpufreq_hw_cpu_init(struc
>
> latency = readl_relaxed(data->reg_bases[REG_FREQ_LATENCY]) * 1000;
> if (!latency)
> - latency = CPUFREQ_ETERNAL;
> + latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
>
> policy->cpuinfo.transition_latency = latency;
> policy->fast_switch_possible = true;
> --- a/drivers/cpufreq/scmi-cpufreq.c
> +++ b/drivers/cpufreq/scmi-cpufreq.c
> @@ -294,7 +294,7 @@ static int scmi_cpufreq_init(struct cpuf
>
> latency = perf_ops->transition_latency_get(ph, domain);
> if (!latency)
> - latency = CPUFREQ_ETERNAL;
> + latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
>
> policy->cpuinfo.transition_latency = latency;
>
> --- a/drivers/cpufreq/scpi-cpufreq.c
> +++ b/drivers/cpufreq/scpi-cpufreq.c
> @@ -157,7 +157,7 @@ static int scpi_cpufreq_init(struct cpuf
>
> latency = scpi_ops->get_transition_latency(cpu_dev);
> if (!latency)
> - latency = CPUFREQ_ETERNAL;
> + latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
>
> policy->cpuinfo.transition_latency = latency;
>
> --- a/drivers/cpufreq/spear-cpufreq.c
> +++ b/drivers/cpufreq/spear-cpufreq.c
> @@ -182,7 +182,7 @@ static int spear_cpufreq_probe(struct pl
>
> if (of_property_read_u32(np, "clock-latency",
> &spear_cpufreq.transition_latency))
> - spear_cpufreq.transition_latency = CPUFREQ_ETERNAL;
> + spear_cpufreq.transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
>
> cnt = of_property_count_u32_elems(np, "cpufreq_tbl");
> if (cnt <= 0) {
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -32,6 +32,9 @@
> */
>
> #define CPUFREQ_ETERNAL (-1)
> +
> +#define CPUFREQ_DEFAULT_TANSITION_LATENCY_NS NSEC_PER_MSEC
> +
> #define CPUFREQ_NAME_LEN 16
> /* Print length for names. Extra 1 space for accommodating '\n' in prints */
> #define CPUFREQ_NAME_PLEN (CPUFREQ_NAME_LEN + 1)
>
>
>
© 2016 - 2025 Red Hat, Inc.