cpufreq_set_policy() will ultimately override the policy min/max
values written in the .init() callback through:
cpufreq_policy_online()
\-cpufreq_init_policy()
\-cpufreq_set_policy()
\-/* Set policy->min/max */
Thus the policy min/max values provided are only temporary.
There is an exception if CPUFREQ_NEED_INITIAL_FREQ_CHECK is set and:
cpufreq_policy_online()
\-cpufreq_init_policy()
\-__cpufreq_driver_target()
\-cpufreq_driver->target()
is called. In this case, some drivers use the policy min/max
values in their .target() callback before they are overridden.
This should only concern the sh-cpufreq driver, so policy->min
and max values are replaced with their cpuinfo equivalent.
In this patch:
- Setting policy->min or max value in driver .init() cb is
interpreted as setting a QoS constraint.
- All policy->min and max initialization is removed as the value
is not used.
- For the cppc-cpufreq driver, the lowest non-linear freq. is
used as a min QoS constraint as suggested at:
https://lore.kernel.org/lkml/20260213100633.15413-1-zhangpengjie2@huawei.com/
Signed-off-by: Pierre Gondois <pierre.gondois@arm.com>
---
drivers/cpufreq/amd-pstate.c | 24 ++++++++++++------------
drivers/cpufreq/cppc_cpufreq.c | 11 +++++++----
drivers/cpufreq/cpufreq-nforce2.c | 4 ++--
drivers/cpufreq/cpufreq.c | 15 +++++++++++++--
drivers/cpufreq/freq_table.c | 7 +++----
drivers/cpufreq/gx-suspmod.c | 9 ++++-----
drivers/cpufreq/intel_pstate.c | 3 ---
drivers/cpufreq/pcc-cpufreq.c | 8 ++++----
drivers/cpufreq/pxa3xx-cpufreq.c | 4 ++--
drivers/cpufreq/sh-cpufreq.c | 7 ++++---
drivers/cpufreq/virtual-cpufreq.c | 5 +----
11 files changed, 52 insertions(+), 45 deletions(-)
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 310d5938cbdf6..aaafbe9b26cae 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1003,12 +1003,12 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
perf = READ_ONCE(cpudata->perf);
- policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf,
- cpudata->nominal_freq,
- perf.lowest_perf);
- policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf,
- cpudata->nominal_freq,
- perf.highest_perf);
+ policy->cpuinfo.min_freq = perf_to_freq(perf,
+ cpudata->nominal_freq,
+ perf.lowest_perf);
+ policy->cpuinfo.max_freq = perf_to_freq(perf,
+ cpudata->nominal_freq,
+ perf.highest_perf);
ret = amd_pstate_cppc_enable(policy);
if (ret)
@@ -1485,12 +1485,12 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
perf = READ_ONCE(cpudata->perf);
- policy->cpuinfo.min_freq = policy->min = perf_to_freq(perf,
- cpudata->nominal_freq,
- perf.lowest_perf);
- policy->cpuinfo.max_freq = policy->max = perf_to_freq(perf,
- cpudata->nominal_freq,
- perf.highest_perf);
+ policy->cpuinfo.min_freq = perf_to_freq(perf,
+ cpudata->nominal_freq,
+ perf.lowest_perf);
+ policy->cpuinfo.max_freq = perf_to_freq(perf,
+ cpudata->nominal_freq,
+ perf.highest_perf);
policy->driver_data = cpudata;
ret = amd_pstate_cppc_enable(policy);
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 4c46c7ea318eb..2bbf36516a0f0 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -586,6 +586,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
unsigned int cpu = policy->cpu;
struct cppc_cpudata *cpu_data;
struct cppc_perf_caps *caps;
+ unsigned int min, max;
int ret;
cpu_data = cppc_cpufreq_get_cpu_data(cpu);
@@ -596,13 +597,15 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
caps = &cpu_data->perf_caps;
policy->driver_data = cpu_data;
+ min = cppc_perf_to_khz(caps, caps->lowest_nonlinear_perf);
+ max = cppc_perf_to_khz(caps, policy->boost_enabled ?
+ caps->highest_perf : caps->nominal_perf);
+
/*
* Set min to lowest nonlinear perf to avoid any efficiency penalty (see
* Section 8.4.7.1.1.5 of ACPI 6.1 spec)
*/
- policy->min = cppc_perf_to_khz(caps, caps->lowest_nonlinear_perf);
- policy->max = cppc_perf_to_khz(caps, policy->boost_enabled ?
- caps->highest_perf : caps->nominal_perf);
+ policy->min = min;
/*
* Set cpuinfo.min_freq to Lowest to make the full range of performance
@@ -610,7 +613,7 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
* nonlinear perf
*/
policy->cpuinfo.min_freq = cppc_perf_to_khz(caps, caps->lowest_perf);
- policy->cpuinfo.max_freq = policy->max;
+ policy->cpuinfo.max_freq = max;
policy->transition_delay_us = cppc_cpufreq_get_transition_delay_us(cpu);
policy->shared_type = cpu_data->shared_type;
diff --git a/drivers/cpufreq/cpufreq-nforce2.c b/drivers/cpufreq/cpufreq-nforce2.c
index fbbbe501cf2dc..831102522ad64 100644
--- a/drivers/cpufreq/cpufreq-nforce2.c
+++ b/drivers/cpufreq/cpufreq-nforce2.c
@@ -355,8 +355,8 @@ static int nforce2_cpu_init(struct cpufreq_policy *policy)
min_fsb = NFORCE2_MIN_FSB;
/* cpuinfo and default policy values */
- policy->min = policy->cpuinfo.min_freq = min_fsb * fid * 100;
- policy->max = policy->cpuinfo.max_freq = max_fsb * fid * 100;
+ policy->cpuinfo.min_freq = min_fsb * fid * 100;
+ policy->cpuinfo.max_freq = max_fsb * fid * 100;
return 0;
}
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index e4f24754df164..3059200766b0d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1451,8 +1451,19 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy,
cpumask_and(policy->cpus, policy->cpus, cpu_online_mask);
if (new_policy) {
+ unsigned int min, max;
unsigned int req_nr;
+ /*
+ * If the driver has set policy->min or max,
+ * use the value as a QoS request.
+ */
+ min = max(FREQ_QOS_MIN_DEFAULT_VALUE, policy->min);
+ if (policy->max)
+ max = min(FREQ_QOS_MAX_DEFAULT_VALUE, policy->max);
+ else
+ max = FREQ_QOS_MAX_DEFAULT_VALUE;
+
for_each_cpu(j, policy->related_cpus) {
per_cpu(cpufreq_cpu_data, j) = policy;
add_cpu_dev_symlink(policy, j, get_cpu_device(j));
@@ -1468,7 +1479,7 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy,
ret = freq_qos_add_request(&policy->constraints,
policy->min_freq_req, FREQ_QOS_MIN,
- FREQ_QOS_MIN_DEFAULT_VALUE);
+ min);
if (ret < 0) {
/*
* So we don't call freq_qos_remove_request() for an
@@ -1488,7 +1499,7 @@ static int cpufreq_policy_online(struct cpufreq_policy *policy,
ret = freq_qos_add_request(&policy->constraints,
policy->max_freq_req, FREQ_QOS_MAX,
- FREQ_QOS_MAX_DEFAULT_VALUE);
+ max);
if (ret < 0) {
policy->max_freq_req = NULL;
goto out_destroy_policy;
diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c
index 7f251daf03ce3..9b37f37c36389 100644
--- a/drivers/cpufreq/freq_table.c
+++ b/drivers/cpufreq/freq_table.c
@@ -49,16 +49,15 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy)
max_freq = freq;
}
- policy->min = policy->cpuinfo.min_freq = min_freq;
- policy->max = max_freq;
+ policy->cpuinfo.min_freq = min_freq;
/*
* If the driver has set its own cpuinfo.max_freq above max_freq, leave
* it as is.
*/
if (policy->cpuinfo.max_freq < max_freq)
- policy->max = policy->cpuinfo.max_freq = max_freq;
+ policy->cpuinfo.max_freq = max_freq;
- if (policy->min == ~0)
+ if (min_freq == ~0)
return -EINVAL;
else
return 0;
diff --git a/drivers/cpufreq/gx-suspmod.c b/drivers/cpufreq/gx-suspmod.c
index 75b3ef7ec6796..57999b8d51fa2 100644
--- a/drivers/cpufreq/gx-suspmod.c
+++ b/drivers/cpufreq/gx-suspmod.c
@@ -397,7 +397,7 @@ static int cpufreq_gx_target(struct cpufreq_policy *policy,
static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
{
- unsigned int maxfreq;
+ unsigned int minfreq, maxfreq;
if (!policy || policy->cpu != 0)
return -ENODEV;
@@ -418,11 +418,10 @@ static int cpufreq_gx_cpu_init(struct cpufreq_policy *policy)
policy->cpu = 0;
if (max_duration < POLICY_MIN_DIV)
- policy->min = maxfreq / max_duration;
+ minfreq = maxfreq / max_duration;
else
- policy->min = maxfreq / POLICY_MIN_DIV;
- policy->max = maxfreq;
- policy->cpuinfo.min_freq = maxfreq / max_duration;
+ minfreq = maxfreq / POLICY_MIN_DIV;
+ policy->cpuinfo.min_freq = minfreq;
policy->cpuinfo.max_freq = maxfreq;
return 0;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index ec4abe3745736..bf2f7524d04a9 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -3047,9 +3047,6 @@ static int __intel_pstate_cpu_init(struct cpufreq_policy *policy)
policy->cpuinfo.max_freq = READ_ONCE(global.no_turbo) ?
cpu->pstate.max_freq : cpu->pstate.turbo_freq;
- policy->min = policy->cpuinfo.min_freq;
- policy->max = policy->cpuinfo.max_freq;
-
intel_pstate_init_acpi_perf_limits(policy);
policy->fast_switch_possible = true;
diff --git a/drivers/cpufreq/pcc-cpufreq.c b/drivers/cpufreq/pcc-cpufreq.c
index ac2e90a65f0c4..231edfe8cabaa 100644
--- a/drivers/cpufreq/pcc-cpufreq.c
+++ b/drivers/cpufreq/pcc-cpufreq.c
@@ -551,13 +551,13 @@ static int pcc_cpufreq_cpu_init(struct cpufreq_policy *policy)
goto out;
}
- policy->max = policy->cpuinfo.max_freq =
+ policy->cpuinfo.max_freq =
ioread32(&pcch_hdr->nominal) * 1000;
- policy->min = policy->cpuinfo.min_freq =
+ policy->cpuinfo.min_freq =
ioread32(&pcch_hdr->minimum_frequency) * 1000;
- pr_debug("init: policy->max is %d, policy->min is %d\n",
- policy->max, policy->min);
+ pr_debug("init: max_freq is %d, min_freq is %d\n",
+ policy->cpuinfo.max_freq, policy->cpuinfo.min_freq);
out:
return result;
}
diff --git a/drivers/cpufreq/pxa3xx-cpufreq.c b/drivers/cpufreq/pxa3xx-cpufreq.c
index 4afa48d172dbe..f53b9d7edc76a 100644
--- a/drivers/cpufreq/pxa3xx-cpufreq.c
+++ b/drivers/cpufreq/pxa3xx-cpufreq.c
@@ -185,8 +185,8 @@ static int pxa3xx_cpufreq_init(struct cpufreq_policy *policy)
int ret = -EINVAL;
/* set default policy and cpuinfo */
- policy->min = policy->cpuinfo.min_freq = 104000;
- policy->max = policy->cpuinfo.max_freq =
+ policy->cpuinfo.min_freq = 104000;
+ policy->cpuinfo.max_freq =
(cpu_is_pxa320()) ? 806000 : 624000;
policy->cpuinfo.transition_latency = 1000; /* FIXME: 1 ms, assumed */
diff --git a/drivers/cpufreq/sh-cpufreq.c b/drivers/cpufreq/sh-cpufreq.c
index 642ddb9ea217e..5cf0f482924d5 100644
--- a/drivers/cpufreq/sh-cpufreq.c
+++ b/drivers/cpufreq/sh-cpufreq.c
@@ -57,7 +57,8 @@ static long __sh_cpufreq_target(void *arg)
/* Convert target_freq from kHz to Hz */
freq = clk_round_rate(cpuclk, target->freq * 1000);
- if (freq < (policy->min * 1000) || freq > (policy->max * 1000))
+ if (freq < (policy->cpuinfo.min_freq * 1000) ||
+ freq > (policy->cpuinfo.max_freq * 1000))
return -EINVAL;
dev_dbg(dev, "requested frequency %u Hz\n", target->freq * 1000);
@@ -124,9 +125,9 @@ static int sh_cpufreq_cpu_init(struct cpufreq_policy *policy)
dev_notice(dev, "no frequency table found, falling back "
"to rate rounding.\n");
- policy->min = policy->cpuinfo.min_freq =
+ policy->cpuinfo.min_freq =
(clk_round_rate(cpuclk, 1) + 500) / 1000;
- policy->max = policy->cpuinfo.max_freq =
+ policy->cpuinfo.max_freq =
(clk_round_rate(cpuclk, ~0UL) + 500) / 1000;
}
diff --git a/drivers/cpufreq/virtual-cpufreq.c b/drivers/cpufreq/virtual-cpufreq.c
index 6ffa16d239b2b..60707bf6ce1e3 100644
--- a/drivers/cpufreq/virtual-cpufreq.c
+++ b/drivers/cpufreq/virtual-cpufreq.c
@@ -164,10 +164,7 @@ static int virt_cpufreq_get_freq_info(struct cpufreq_policy *policy)
policy->cpuinfo.min_freq = 1;
policy->cpuinfo.max_freq = virt_cpufreq_get_perftbl_entry(policy->cpu, 0);
- policy->min = policy->cpuinfo.min_freq;
- policy->max = policy->cpuinfo.max_freq;
-
- policy->cur = policy->max;
+ policy->cur = policy->cpuinfo.max_freq;
return 0;
}
--
2.43.0
On 17-03-26, 11:17, Pierre Gondois wrote: > cpufreq_set_policy() will ultimately override the policy min/max > values written in the .init() callback through: > cpufreq_policy_online() > \-cpufreq_init_policy() > \-cpufreq_set_policy() > \-/* Set policy->min/max */ > Thus the policy min/max values provided are only temporary. I am not comfortable with this patch to be honest. policy->min/max are used at so many places that it is really difficult to make sure if this patch will break something or not. For example: cpufreq_set_policy() cpufreq_driver->verify() cpufreq_frequency_table_verify() This uses min/max before it is set by the path you mentioned. I would suggest dropping this change, or most of it and doing only what is really required for this series. -- viresh
On 3/20/26 11:14, Viresh Kumar wrote:
> On 17-03-26, 11:17, Pierre Gondois wrote:
>> cpufreq_set_policy() will ultimately override the policy min/max
>> values written in the .init() callback through:
>> cpufreq_policy_online()
>> \-cpufreq_init_policy()
>> \-cpufreq_set_policy()
>> \-/* Set policy->min/max */
>> Thus the policy min/max values provided are only temporary.
> I am not comfortable with this patch to be honest. policy->min/max are used at
> so many places that it is really difficult to make sure if this patch will break
> something or not.
>
> For example:
>
> cpufreq_set_policy()
> cpufreq_driver->verify()
> cpufreq_frequency_table_verify()
>
> This uses min/max before it is set by the path you mentioned.
>
> I would suggest dropping this change, or most of it and doing only what is
> really required for this series.
>
Being able to set the min/max_freq_req from the driver might be
something that is needed, cf:
https://lore.kernel.org/lkml/20260213100633.15413-1-zhangpengjie2@huawei.com/
It would also allow to have a common way to set policy->min/max values
as they are set to the the cpuinfo.min/max_freq.
On the other hand I agree that I didn't test all the possible paths
for this change, so this is a bit audacious.
What about adding the following to have the values set for all drivers:
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 70814c567243b..3a1e5f58a301f 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1530,6 +1530,9 @@ static int cpufreq_policy_online(struct
cpufreq_policy *policy,
CPUFREQ_CREATE_POLICY, policy);
}
+ policy->max = policy->cpuinfo.max_freq;
+ policy->min = policy->cpuinfo.min_freq;
+
if (cpufreq_driver->get && has_target()) {
policy->cur = cpufreq_driver->get(policy->cpu);
if (!policy->cur) {
On 24-03-26, 12:40, Pierre Gondois wrote:
> Being able to set the min/max_freq_req from the driver might be
> something that is needed, cf:
> https://lore.kernel.org/lkml/20260213100633.15413-1-zhangpengjie2@huawei.com/
>
> It would also allow to have a common way to set policy->min/max values
> as they are set to the the cpuinfo.min/max_freq.
>
> On the other hand I agree that I didn't test all the possible paths
> for this change, so this is a bit audacious.
>
>
> What about adding the following to have the values set for all drivers:
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>
> index 70814c567243b..3a1e5f58a301f 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -1530,6 +1530,9 @@ static int cpufreq_policy_online(struct cpufreq_policy
> *policy,
> CPUFREQ_CREATE_POLICY, policy);
> }
>
> + policy->max = policy->cpuinfo.max_freq;
> + policy->min = policy->cpuinfo.min_freq;
> +
> if (cpufreq_driver->get && has_target()) {
> policy->cur = cpufreq_driver->get(policy->cpu);
> if (!policy->cur) {
I am okay with a separate series doing all these cleanups. We can set some sort
of rule on what is expected from the driver here and what is left from the core.
For example driver only sets cpuinfo.max/min and core takes care of rest. This
series would cleanup all the drivers, etc.
--
viresh
On 3/25/26 07:49, Viresh Kumar wrote:
> On 24-03-26, 12:40, Pierre Gondois wrote:
>> Being able to set the min/max_freq_req from the driver might be
>> something that is needed, cf:
>> https://lore.kernel.org/lkml/20260213100633.15413-1-zhangpengjie2@huawei.com/
>>
>> It would also allow to have a common way to set policy->min/max values
>> as they are set to the the cpuinfo.min/max_freq.
>>
>> On the other hand I agree that I didn't test all the possible paths
>> for this change, so this is a bit audacious.
>>
>>
>> What about adding the following to have the values set for all drivers:
>>
>> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
>>
>> index 70814c567243b..3a1e5f58a301f 100644
>> --- a/drivers/cpufreq/cpufreq.c
>> +++ b/drivers/cpufreq/cpufreq.c
>> @@ -1530,6 +1530,9 @@ static int cpufreq_policy_online(struct cpufreq_policy
>> *policy,
>> CPUFREQ_CREATE_POLICY, policy);
>> }
>>
>> + policy->max = policy->cpuinfo.max_freq;
>> + policy->min = policy->cpuinfo.min_freq;
>> +
>> if (cpufreq_driver->get && has_target()) {
>> policy->cur = cpufreq_driver->get(policy->cpu);
>> if (!policy->cur) {
> I am okay with a separate series doing all these cleanups. We can set some sort
> of rule on what is expected from the driver here and what is left from the core.
> For example driver only sets cpuinfo.max/min and core takes care of rest. This
> series would cleanup all the drivers, etc.
>
Ok,
Is the current state of this patch + the above modification acceptable ?
On 25-03-26, 17:54, Pierre Gondois wrote: > Is the current state of this patch + the above modification acceptable ? I will review it closely again once you send it.. There were too many small changes and I got confused earlier :) -- viresh
© 2016 - 2026 Red Hat, Inc.