drivers/cpufreq/cpufreq.c | 3 +++ include/linux/cpufreq.h | 2 ++ 2 files changed, 5 insertions(+)
From: Shawn Guo <shawnguo@kernel.org>
A regression is seen with 6.6 -> 6.12 kernel upgrade on platforms where
cpufreq-dt driver sets cpuinfo.transition_latency as CPUFREQ_ETERNAL (-1),
due to that platform's DT doesn't provide the optional property
'clock-latency-ns'. The dbs sampling_rate was 10000 us on 6.6 and
suddently becomes 6442450 us (4294967295 / 1000 * 1.5) on 6.12 for these
platforms, because the default transition delay was dropped by the commits
below.
commit 37c6dccd6837 ("cpufreq: Remove LATENCY_MULTIPLIER")
commit a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us")
commit e13aa799c2a6 ("cpufreq: Change default transition delay to 2ms")
It slows down dbs governor's reacting to CPU loading change
dramatically. Also, as transition_delay_us is used by schedutil governor
as rate_limit_us, it shows a negative impact on device idle power
consumption, because the device gets slightly less time in the lowest OPP.
Fix the regressions by defining a default transition latency for
handling the case of CPUFREQ_ETERNAL.
Cc: stable@vger.kernel.org
Fixes: 37c6dccd6837 ("cpufreq: Remove LATENCY_MULTIPLIER")
Signed-off-by: Shawn Guo <shawnguo@kernel.org>
---
Changes for v2:
- Follow Rafael's suggestion to define a default transition latency for
handling CPUFREQ_ETERNAL, and pave the way to get rid of
CPUFREQ_ETERNAL completely later.
v1: https://lkml.org/lkml/2025/9/10/294
drivers/cpufreq/cpufreq.c | 3 +++
include/linux/cpufreq.h | 2 ++
2 files changed, 5 insertions(+)
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index fc7eace8b65b..c69d10f0e8ec 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -549,6 +549,9 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy)
if (policy->transition_delay_us)
return policy->transition_delay_us;
+ if (policy->cpuinfo.transition_latency == CPUFREQ_ETERNAL)
+ policy->cpuinfo.transition_latency = CPUFREQ_DEFAULT_TANSITION_LATENCY_NS;
+
latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
if (latency)
/* Give a 50% breathing room between updates */
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 95f3807c8c55..935e9a660039 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -36,6 +36,8 @@
/* Print length for names. Extra 1 space for accommodating '\n' in prints */
#define CPUFREQ_NAME_PLEN (CPUFREQ_NAME_LEN + 1)
+#define CPUFREQ_DEFAULT_TANSITION_LATENCY_NS NSEC_PER_MSEC
+
struct cpufreq_governor;
enum cpufreq_table_sorting {
--
2.43.0
On 9/22/2025 8:59 PM, Shawn Guo wrote: > From: Shawn Guo <shawnguo@kernel.org> > > A regression is seen with 6.6 -> 6.12 kernel upgrade on platforms where > cpufreq-dt driver sets cpuinfo.transition_latency as CPUFREQ_ETERNAL (-1), > due to that platform's DT doesn't provide the optional property > 'clock-latency-ns'. The dbs sampling_rate was 10000 us on 6.6 and > suddently becomes 6442450 us (4294967295 / 1000 * 1.5) on 6.12 for these > platforms, because the default transition delay was dropped by the commits > below. > > commit 37c6dccd6837 ("cpufreq: Remove LATENCY_MULTIPLIER") > commit a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us") Hello Shawn, Reported by checkpatch.pl: WARNING: Prefer a maximum 75 chars per line (possible unwrapped commit description?) #12: commit a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us") > commit e13aa799c2a6 ("cpufreq: Change default transition delay to 2ms") > > It slows down dbs governor's reacting to CPU loading change > dramatically. Also, as transition_delay_us is used by schedutil governor > as rate_limit_us, it shows a negative impact on device idle power > consumption, because the device gets slightly less time in the lowest OPP. > > Fix the regressions by defining a default transition latency for > handling the case of CPUFREQ_ETERNAL. > > Cc: stable@vger.kernel.org > Fixes: 37c6dccd6837 ("cpufreq: Remove LATENCY_MULTIPLIER") > Signed-off-by: Shawn Guo <shawnguo@kernel.org> > --- > Changes for v2: > - Follow Rafael's suggestion to define a default transition latency for > handling CPUFREQ_ETERNAL, and pave the way to get rid of > CPUFREQ_ETERNAL completely later. > > v1: https://lkml.org/lkml/2025/9/10/294 > > drivers/cpufreq/cpufreq.c | 3 +++ > include/linux/cpufreq.h | 2 ++ > 2 files changed, 5 insertions(+) > > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c > index fc7eace8b65b..c69d10f0e8ec 100644 > --- a/drivers/cpufreq/cpufreq.c > +++ b/drivers/cpufreq/cpufreq.c > @@ -549,6 +549,9 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy) > if (policy->transition_delay_us) > return policy->transition_delay_us; > > + if (policy->cpuinfo.transition_latency == CPUFREQ_ETERNAL) > + policy->cpuinfo.transition_latency = CPUFREQ_DEFAULT_TANSITION_LATENCY_NS; For the fallback case, May we print a dbg info in dmesg to inform developers that the device tree is missing the clock-latency-ns property? (Rafael can help comment~) > + > latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC; > if (latency) > /* Give a 50% breathing room between updates */ > diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h > index 95f3807c8c55..935e9a660039 100644 > --- a/include/linux/cpufreq.h > +++ b/include/linux/cpufreq.h > @@ -36,6 +36,8 @@ > /* Print length for names. Extra 1 space for accommodating '\n' in prints */ > #define CPUFREQ_NAME_PLEN (CPUFREQ_NAME_LEN + 1) > > +#define CPUFREQ_DEFAULT_TANSITION_LATENCY_NS NSEC_PER_MSEC > + TANSITION --> TRANSITION ? > struct cpufreq_governor; > > enum cpufreq_table_sorting { -- Thx and BRs, Zhongqiu Han
On Monday, September 22, 2025 2:59:21 PM CEST Shawn Guo wrote:
> From: Shawn Guo <shawnguo@kernel.org>
>
> A regression is seen with 6.6 -> 6.12 kernel upgrade on platforms where
> cpufreq-dt driver sets cpuinfo.transition_latency as CPUFREQ_ETERNAL (-1),
> due to that platform's DT doesn't provide the optional property
> 'clock-latency-ns'. The dbs sampling_rate was 10000 us on 6.6 and
> suddently becomes 6442450 us (4294967295 / 1000 * 1.5) on 6.12 for these
> platforms, because the default transition delay was dropped by the commits
> below.
>
> commit 37c6dccd6837 ("cpufreq: Remove LATENCY_MULTIPLIER")
> commit a755d0e2d41b ("cpufreq: Honour transition_latency over transition_delay_us")
> commit e13aa799c2a6 ("cpufreq: Change default transition delay to 2ms")
>
> It slows down dbs governor's reacting to CPU loading change
> dramatically. Also, as transition_delay_us is used by schedutil governor
> as rate_limit_us, it shows a negative impact on device idle power
> consumption, because the device gets slightly less time in the lowest OPP.
>
> Fix the regressions by defining a default transition latency for
> handling the case of CPUFREQ_ETERNAL.
>
> Cc: stable@vger.kernel.org
> Fixes: 37c6dccd6837 ("cpufreq: Remove LATENCY_MULTIPLIER")
> Signed-off-by: Shawn Guo <shawnguo@kernel.org>
> ---
> Changes for v2:
> - Follow Rafael's suggestion to define a default transition latency for
> handling CPUFREQ_ETERNAL, and pave the way to get rid of
> CPUFREQ_ETERNAL completely later.
>
> v1: https://lkml.org/lkml/2025/9/10/294
>
> drivers/cpufreq/cpufreq.c | 3 +++
> include/linux/cpufreq.h | 2 ++
> 2 files changed, 5 insertions(+)
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index fc7eace8b65b..c69d10f0e8ec 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -549,6 +549,9 @@ unsigned int cpufreq_policy_transition_delay_us(struct cpufreq_policy *policy)
> if (policy->transition_delay_us)
> return policy->transition_delay_us;
>
> + if (policy->cpuinfo.transition_latency == CPUFREQ_ETERNAL)
> + policy->cpuinfo.transition_latency = CPUFREQ_DEFAULT_TANSITION_LATENCY_NS;
> +
> latency = policy->cpuinfo.transition_latency / NSEC_PER_USEC;
> if (latency)
> /* Give a 50% breathing room between updates */
> diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
> index 95f3807c8c55..935e9a660039 100644
> --- a/include/linux/cpufreq.h
> +++ b/include/linux/cpufreq.h
> @@ -36,6 +36,8 @@
> /* Print length for names. Extra 1 space for accommodating '\n' in prints */
> #define CPUFREQ_NAME_PLEN (CPUFREQ_NAME_LEN + 1)
>
> +#define CPUFREQ_DEFAULT_TANSITION_LATENCY_NS NSEC_PER_MSEC
> +
> struct cpufreq_governor;
>
> enum cpufreq_table_sorting {
>
What about the appended (untested) change instead?
With a follow-up one to replace CPUFREQ_ETERNAL with something internal
to CPPC.
---
Documentation/admin-guide/pm/cpufreq.rst | 4 ----
Documentation/cpu-freq/cpu-drivers.rst | 3 +--
Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst | 3 +--
Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst | 3 +--
drivers/cpufreq/cppc_cpufreq.c | 14 ++++++++++++--
drivers/cpufreq/cpufreq-dt.c | 2 +-
drivers/cpufreq/imx6q-cpufreq.c | 2 +-
drivers/cpufreq/mediatek-cpufreq-hw.c | 2 +-
drivers/cpufreq/scmi-cpufreq.c | 2 +-
drivers/cpufreq/scpi-cpufreq.c | 2 +-
drivers/cpufreq/spear-cpufreq.c | 2 +-
include/linux/cpufreq.h | 7 ++++---
12 files changed, 25 insertions(+), 21 deletions(-)
--- a/Documentation/admin-guide/pm/cpufreq.rst
+++ b/Documentation/admin-guide/pm/cpufreq.rst
@@ -274,10 +274,6 @@ are the following:
The time it takes to switch the CPUs belonging to this policy from one
P-state to another, in nanoseconds.
- If unknown or if known to be so high that the scaling driver does not
- work with the `ondemand`_ governor, -1 (:c:macro:`CPUFREQ_ETERNAL`)
- will be returned by reads from this attribute.
-
``related_cpus``
List of all (online and offline) CPUs belonging to this policy.
--- a/Documentation/cpu-freq/cpu-drivers.rst
+++ b/Documentation/cpu-freq/cpu-drivers.rst
@@ -109,8 +109,7 @@ Then, the driver must fill in the follow
+-----------------------------------+--------------------------------------+
|policy->cpuinfo.transition_latency | the time it takes on this CPU to |
| | switch between two frequencies in |
-| | nanoseconds (if appropriate, else |
-| | specify CPUFREQ_ETERNAL) |
+| | nanoseconds |
+-----------------------------------+--------------------------------------+
|policy->cur | The current operating frequency of |
| | this CPU (if appropriate) |
--- a/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst
+++ b/Documentation/translations/zh_CN/cpu-freq/cpu-drivers.rst
@@ -112,8 +112,7 @@ CPUfreq核心层注册一个cpufreq_driv
| | |
+-----------------------------------+--------------------------------------+
|policy->cpuinfo.transition_latency | CPU在两个频率之间切换所需的时间,以 |
-| | 纳秒为单位(如不适用,设定为 |
-| | CPUFREQ_ETERNAL) |
+| | 纳秒为单位 |
| | |
+-----------------------------------+--------------------------------------+
|policy->cur | 该CPU当前的工作频率(如适用) |
--- a/Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst
+++ b/Documentation/translations/zh_TW/cpu-freq/cpu-drivers.rst
@@ -112,8 +112,7 @@ CPUfreq核心層註冊一個cpufreq_driv
| | |
+-----------------------------------+--------------------------------------+
|policy->cpuinfo.transition_latency | CPU在兩個頻率之間切換所需的時間,以 |
-| | 納秒爲單位(如不適用,設定爲 |
-| | CPUFREQ_ETERNAL) |
+| | 納秒爲單位 |
| | |
+-----------------------------------+--------------------------------------+
|policy->cur | 該CPU當前的工作頻率(如適用) |
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -308,6 +308,16 @@ static int cppc_verify_policy(struct cpu
return 0;
}
+static unsigned int get_transition_latency(unsigned int cpu)
+{
+ unsigned int transition_latency_ns = cppc_get_transition_latency(cpu);
+
+ if (transition_latency_ns == CPUFREQ_ETERNAL)
+ return CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS / NSEC_PER_USEC;
+
+ return transition_latency_ns / NSEC_PER_USEC;
+}
+
/*
* The PCC subspace describes the rate at which platform can accept commands
* on the shared PCC channel (including READs which do not count towards freq
@@ -330,12 +340,12 @@ static unsigned int cppc_cpufreq_get_tra
return 10000;
}
}
- return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
+ return get_transition_latency(cpu);
}
#else
static unsigned int cppc_cpufreq_get_transition_delay_us(unsigned int cpu)
{
- return cppc_get_transition_latency(cpu) / NSEC_PER_USEC;
+ return get_transition_latency(cpu);
}
#endif
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -104,7 +104,7 @@ static int cpufreq_init(struct cpufreq_p
transition_latency = dev_pm_opp_get_max_transition_latency(cpu_dev);
if (!transition_latency)
- transition_latency = CPUFREQ_ETERNAL;
+ transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
cpumask_copy(policy->cpus, priv->cpus);
policy->driver_data = priv;
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -442,7 +442,7 @@ soc_opp_out:
}
if (of_property_read_u32(np, "clock-latency", &transition_latency))
- transition_latency = CPUFREQ_ETERNAL;
+ transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
/*
* Calculate the ramp time for max voltage change in the
--- a/drivers/cpufreq/mediatek-cpufreq-hw.c
+++ b/drivers/cpufreq/mediatek-cpufreq-hw.c
@@ -309,7 +309,7 @@ static int mtk_cpufreq_hw_cpu_init(struc
latency = readl_relaxed(data->reg_bases[REG_FREQ_LATENCY]) * 1000;
if (!latency)
- latency = CPUFREQ_ETERNAL;
+ latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
policy->cpuinfo.transition_latency = latency;
policy->fast_switch_possible = true;
--- a/drivers/cpufreq/scmi-cpufreq.c
+++ b/drivers/cpufreq/scmi-cpufreq.c
@@ -294,7 +294,7 @@ static int scmi_cpufreq_init(struct cpuf
latency = perf_ops->transition_latency_get(ph, domain);
if (!latency)
- latency = CPUFREQ_ETERNAL;
+ latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
policy->cpuinfo.transition_latency = latency;
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -157,7 +157,7 @@ static int scpi_cpufreq_init(struct cpuf
latency = scpi_ops->get_transition_latency(cpu_dev);
if (!latency)
- latency = CPUFREQ_ETERNAL;
+ latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
policy->cpuinfo.transition_latency = latency;
--- a/drivers/cpufreq/spear-cpufreq.c
+++ b/drivers/cpufreq/spear-cpufreq.c
@@ -182,7 +182,7 @@ static int spear_cpufreq_probe(struct pl
if (of_property_read_u32(np, "clock-latency",
&spear_cpufreq.transition_latency))
- spear_cpufreq.transition_latency = CPUFREQ_ETERNAL;
+ spear_cpufreq.transition_latency = CPUFREQ_DEFAULT_TRANSITION_LATENCY_NS;
cnt = of_property_count_u32_elems(np, "cpufreq_tbl");
if (cnt <= 0) {
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -26,12 +26,13 @@
*********************************************************************/
/*
* Frequency values here are CPU kHz
- *
- * Maximum transition latency is in nanoseconds - if it's unknown,
- * CPUFREQ_ETERNAL shall be used.
*/
+/* Represents unknown transition latency */
#define CPUFREQ_ETERNAL (-1)
+
+#define CPUFREQ_DEFAULT_TANSITION_LATENCY_NS NSEC_PER_MSEC
+
#define CPUFREQ_NAME_LEN 16
/* Print length for names. Extra 1 space for accommodating '\n' in prints */
#define CPUFREQ_NAME_PLEN (CPUFREQ_NAME_LEN + 1)
© 2016 - 2025 Red Hat, Inc.