When boot with maxcpu=1 restrict, and LPI(Low Power Idle States) is on,
only CPU0 will go online. The support AMU flag of CPU0 will be set but the
flags of other CPUs will not. This will cause AMU FIE set up fail for CPU0
when it shares a cpufreq policy with other CPU(s). After that, when other
CPUs are finally online and the support AMU flags of them are set, they'll
never have a chance to set up AMU FIE, even though they're eligible.
To solve this problem, the process of setting up AMU FIE needs to be
modified as follows:
1. Set up AMU FIE only for the online CPUs.
2. Try to set up AMU FIE each time a CPU goes online and do the
freq_counters_valid() check. If this check fails, clear scale freq source
of all the CPUs related to the same policy, in case they use different
source of the freq scale.
At the same time, this change also be applied to cpufreq when calling
arch_set_freq_scale.
Signed-off-by: Lifeng Zheng <zhenglifeng1@huawei.com>
---
arch/arm64/kernel/topology.c | 54 ++++++++++++++++++++++++++++++++++--
drivers/cpufreq/cpufreq.c | 4 +--
2 files changed, 54 insertions(+), 4 deletions(-)
diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c
index 9317a618bb87..a9d9e9969cea 100644
--- a/arch/arm64/kernel/topology.c
+++ b/arch/arm64/kernel/topology.c
@@ -385,7 +385,7 @@ static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val,
struct cpufreq_policy *policy = data;
if (val == CPUFREQ_CREATE_POLICY)
- amu_fie_setup(policy->related_cpus);
+ amu_fie_setup(policy->cpus);
/*
* We don't need to handle CPUFREQ_REMOVE_POLICY event as the AMU
@@ -404,10 +404,60 @@ static struct notifier_block init_amu_fie_notifier = {
.notifier_call = init_amu_fie_callback,
};
+static int cpuhp_topology_online(unsigned int cpu)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_policy(cpu);
+
+ /*
+ * If the online CPUs are not all AMU FIE CPUs or the new one is already
+ * an AMU FIE one, no need to set it.
+ */
+ if (!policy || !cpumask_available(amu_fie_cpus) ||
+ !cpumask_subset(policy->cpus, amu_fie_cpus) ||
+ cpumask_test_cpu(cpu, amu_fie_cpus))
+ return 0;
+
+ /*
+ * If the new online CPU cannot pass this check, all the CPUs related to
+ * the same policy should be clear from amu_fie_cpus mask, otherwise they
+ * may use different source of the freq scale.
+ */
+ if (WARN_ON(!freq_counters_valid(cpu))) {
+ topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_ARCH,
+ policy->related_cpus);
+ cpumask_andnot(amu_fie_cpus, amu_fie_cpus, policy->related_cpus);
+ return 0;
+ }
+
+ cpumask_set_cpu(cpu, amu_fie_cpus);
+
+ topology_set_scale_freq_source(&amu_sfd, cpumask_of(cpu));
+
+ pr_debug("CPU[%u]: counter will be used for FIE.", cpu);
+
+ return 0;
+}
+
static int __init init_amu_fie(void)
{
- return cpufreq_register_notifier(&init_amu_fie_notifier,
+ int ret;
+
+ ret = cpufreq_register_notifier(&init_amu_fie_notifier,
CPUFREQ_POLICY_NOTIFIER);
+ if (ret)
+ return ret;
+
+ ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
+ "arm64/topology:online",
+ cpuhp_topology_online,
+ NULL);
+ if (ret < 0) {
+ cpufreq_unregister_notifier(&init_amu_fie_notifier,
+ CPUFREQ_POLICY_NOTIFIER);
+ return ret;
+ }
+
+ return 0;
}
core_initcall(init_amu_fie);
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 78ca68ea754d..d1890a2af1af 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -417,7 +417,7 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy,
cpufreq_notify_post_transition(policy, freqs, transition_failed);
- arch_set_freq_scale(policy->related_cpus,
+ arch_set_freq_scale(policy->cpus,
policy->cur,
arch_scale_freq_ref(policy->cpu));
@@ -2219,7 +2219,7 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy,
return 0;
policy->cur = freq;
- arch_set_freq_scale(policy->related_cpus, freq,
+ arch_set_freq_scale(policy->cpus, freq,
arch_scale_freq_ref(policy->cpu));
cpufreq_stats_record_transition(policy, freq);
--
2.33.0
Hi, On Tuesday 19 Aug 2025 at 15:29:31 (+0800), Lifeng Zheng wrote: > When boot with maxcpu=1 restrict, and LPI(Low Power Idle States) is on, > only CPU0 will go online. The support AMU flag of CPU0 will be set but the > flags of other CPUs will not. This will cause AMU FIE set up fail for CPU0 > when it shares a cpufreq policy with other CPU(s). After that, when other > CPUs are finally online and the support AMU flags of them are set, they'll > never have a chance to set up AMU FIE, even though they're eligible. > > To solve this problem, the process of setting up AMU FIE needs to be > modified as follows: > > 1. Set up AMU FIE only for the online CPUs. > > 2. Try to set up AMU FIE each time a CPU goes online and do the > freq_counters_valid() check. If this check fails, clear scale freq source > of all the CPUs related to the same policy, in case they use different > source of the freq scale. > > At the same time, this change also be applied to cpufreq when calling > arch_set_freq_scale. > > Signed-off-by: Lifeng Zheng <zhenglifeng1@huawei.com> > --- > arch/arm64/kernel/topology.c | 54 ++++++++++++++++++++++++++++++++++-- > drivers/cpufreq/cpufreq.c | 4 +-- > 2 files changed, 54 insertions(+), 4 deletions(-) > [..] > > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c > index 78ca68ea754d..d1890a2af1af 100644 > --- a/drivers/cpufreq/cpufreq.c > +++ b/drivers/cpufreq/cpufreq.c > @@ -417,7 +417,7 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy, > > cpufreq_notify_post_transition(policy, freqs, transition_failed); > > - arch_set_freq_scale(policy->related_cpus, > + arch_set_freq_scale(policy->cpus, > policy->cur, > arch_scale_freq_ref(policy->cpu)); > > @@ -2219,7 +2219,7 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, > return 0; > > policy->cur = freq; > - arch_set_freq_scale(policy->related_cpus, freq, > + arch_set_freq_scale(policy->cpus, freq, > arch_scale_freq_ref(policy->cpu)); I think it might be good to keep these calls to arch_set_freq_scale() for all related CPUs and not only online ones. This can result in CPUs coming out of hotplug with a wrong scale factor, because while they were out, any frequency transitions of the policy only modified the scale factor of online CPUs. When they come out of hotplug, arch_set_freq_scale() will not be called for them until there's a new frequency transition. I understand that if this is not changed to only pass online CPUs, supports_scale_freq_counters() will now fail when called in topology_set_freq_scale() for scenarios when only some CPUs in a policy are online - e.g. the scenario in your commit message. But I think a simple change in supports_scale_freq_counters() that instead checks that at least one CPU in the policy supports AMU-based FIE, instead of all, is a better fix that does not break the cpufreq-based FIE. If at least one CPU is marked as supporting AMUs for FIE we know that the AMU setup path is in progress and we should bail out of topology_set_freq_scale()/arch_set_freq_scale(). Hope it helps, Ionela. > cpufreq_stats_record_transition(policy, freq); > > -- > 2.33.0 > >
Hi Ionela, On Mon, Sep 01, 2025 at 08:29:26AM +0100, Ionela Voinescu wrote: > Hi, > > On Tuesday 19 Aug 2025 at 15:29:31 (+0800), Lifeng Zheng wrote: > > When boot with maxcpu=1 restrict, and LPI(Low Power Idle States) is on, > > only CPU0 will go online. The support AMU flag of CPU0 will be set but the > > flags of other CPUs will not. This will cause AMU FIE set up fail for CPU0 > > when it shares a cpufreq policy with other CPU(s). After that, when other > > CPUs are finally online and the support AMU flags of them are set, they'll > > never have a chance to set up AMU FIE, even though they're eligible. > > > > To solve this problem, the process of setting up AMU FIE needs to be > > modified as follows: > > > > 1. Set up AMU FIE only for the online CPUs. > > > > 2. Try to set up AMU FIE each time a CPU goes online and do the > > freq_counters_valid() check. If this check fails, clear scale freq source > > of all the CPUs related to the same policy, in case they use different > > source of the freq scale. > > > > At the same time, this change also be applied to cpufreq when calling > > arch_set_freq_scale. > > > > Signed-off-by: Lifeng Zheng <zhenglifeng1@huawei.com> > > --- > > arch/arm64/kernel/topology.c | 54 ++++++++++++++++++++++++++++++++++-- > > drivers/cpufreq/cpufreq.c | 4 +-- > > 2 files changed, 54 insertions(+), 4 deletions(-) > > > [..] > > > > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c > > index 78ca68ea754d..d1890a2af1af 100644 > > --- a/drivers/cpufreq/cpufreq.c > > +++ b/drivers/cpufreq/cpufreq.c > > @@ -417,7 +417,7 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy, > > > > cpufreq_notify_post_transition(policy, freqs, transition_failed); > > > > - arch_set_freq_scale(policy->related_cpus, > > + arch_set_freq_scale(policy->cpus, > > policy->cur, > > arch_scale_freq_ref(policy->cpu)); > > > > @@ -2219,7 +2219,7 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, > > return 0; > > > > policy->cur = freq; > > - arch_set_freq_scale(policy->related_cpus, freq, > > + arch_set_freq_scale(policy->cpus, freq, > > arch_scale_freq_ref(policy->cpu)); > > I think it might be good to keep these calls to arch_set_freq_scale() for > all related CPUs and not only online ones. This can result in CPUs coming > out of hotplug with a wrong scale factor, because while they were out, any > frequency transitions of the policy only modified the scale factor of > online CPUs. When they come out of hotplug, arch_set_freq_scale() will not > be called for them until there's a new frequency transition. > > I understand that if this is not changed to only pass online CPUs, > supports_scale_freq_counters() will now fail when called in > topology_set_freq_scale() for scenarios when only some CPUs in a policy > are online - e.g. the scenario in your commit message. But I think a > simple change in supports_scale_freq_counters() that instead checks that > at least one CPU in the policy supports AMU-based FIE, instead of all, > is a better fix that does not break the cpufreq-based FIE. If at least > one CPU is marked as supporting AMUs for FIE we know that the AMU setup > path is in progress and we should bail out of > topology_set_freq_scale()/arch_set_freq_scale(). > Thank you for pointing that out - that indeed might be an issue, and the solution you have suggested should do the trick. --- BR Beata > Hope it helps, > Ionela. > > > cpufreq_stats_record_transition(policy, freq); > > > > -- > > 2.33.0 > > > >
Hi Lifeng, Some minor suggestions in addition to Beata's comments on the readability of those checks. On 19/08/2025 15:29, Lifeng Zheng wrote: > When boot with maxcpu=1 restrict, and LPI(Low Power Idle States) is on, > only CPU0 will go online. The support AMU flag of CPU0 will be set but the > flags of other CPUs will not. This will cause AMU FIE set up fail for CPU0 > when it shares a cpufreq policy with other CPU(s). After that, when other > CPUs are finally online and the support AMU flags of them are set, they'll > never have a chance to set up AMU FIE, even though they're eligible. > > To solve this problem, the process of setting up AMU FIE needs to be > modified as follows: > > 1. Set up AMU FIE only for the online CPUs. > > 2. Try to set up AMU FIE each time a CPU goes online and do the > freq_counters_valid() check. If this check fails, clear scale freq source > of all the CPUs related to the same policy, in case they use different > source of the freq scale. > > At the same time, this change also be applied to cpufreq when calling > arch_set_freq_scale. > > Signed-off-by: Lifeng Zheng <zhenglifeng1@huawei.com> > --- > arch/arm64/kernel/topology.c | 54 ++++++++++++++++++++++++++++++++++-- > drivers/cpufreq/cpufreq.c | 4 +-- > 2 files changed, 54 insertions(+), 4 deletions(-) > > diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c > index 9317a618bb87..a9d9e9969cea 100644 > --- a/arch/arm64/kernel/topology.c > +++ b/arch/arm64/kernel/topology.c > @@ -385,7 +385,7 @@ static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val, > struct cpufreq_policy *policy = data; > > if (val == CPUFREQ_CREATE_POLICY) > - amu_fie_setup(policy->related_cpus); > + amu_fie_setup(policy->cpus); > > /* > * We don't need to handle CPUFREQ_REMOVE_POLICY event as the AMU > @@ -404,10 +404,60 @@ static struct notifier_block init_amu_fie_notifier = { > .notifier_call = init_amu_fie_callback, > }; > > +static int cpuhp_topology_online(unsigned int cpu) > +{ > + struct cpufreq_policy *policy = cpufreq_cpu_policy(cpu); > + > + /* > + * If the online CPUs are not all AMU FIE CPUs or the new one is already > + * an AMU FIE one, no need to set it. > + */ > + if (!policy || !cpumask_available(amu_fie_cpus) || > + !cpumask_subset(policy->cpus, amu_fie_cpus) || > + cpumask_test_cpu(cpu, amu_fie_cpus)) > + return 0; > + > + /* > + * If the new online CPU cannot pass this check, all the CPUs related to > + * the same policy should be clear from amu_fie_cpus mask, otherwise they > + * may use different source of the freq scale. > + */ > + if (WARN_ON(!freq_counters_valid(cpu))) { I think a panic warning might be too much for this? pr_info/pr_warn, or even pr_debug, should be enough. > + topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_ARCH, > + policy->related_cpus); > + cpumask_andnot(amu_fie_cpus, amu_fie_cpus, policy->related_cpus); > + return 0; > + } > + > + cpumask_set_cpu(cpu, amu_fie_cpus); > + > + topology_set_scale_freq_source(&amu_sfd, cpumask_of(cpu)); > + > + pr_debug("CPU[%u]: counter will be used for FIE.", cpu); > + > + return 0; > +} > + > static int __init init_amu_fie(void) > { > - return cpufreq_register_notifier(&init_amu_fie_notifier, > + int ret; > + > + ret = cpufreq_register_notifier(&init_amu_fie_notifier, > CPUFREQ_POLICY_NOTIFIER); > + if (ret) > + return ret; > + > + ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, > + "arm64/topology:online", > + cpuhp_topology_online, > + NULL); > + if (ret < 0) { > + cpufreq_unregister_notifier(&init_amu_fie_notifier, > + CPUFREQ_POLICY_NOTIFIER); > + return ret; > + } > + > + return 0; > } > core_initcall(init_amu_fie); > It's better move the following change into a separate patch before the AMU FIE changes. I don't think they are interdependent, and they can be applied separately. > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c > index 78ca68ea754d..d1890a2af1af 100644 > --- a/drivers/cpufreq/cpufreq.c > +++ b/drivers/cpufreq/cpufreq.c > @@ -417,7 +417,7 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy, > > cpufreq_notify_post_transition(policy, freqs, transition_failed); > > - arch_set_freq_scale(policy->related_cpus, > + arch_set_freq_scale(policy->cpus, > policy->cur, > arch_scale_freq_ref(policy->cpu)); > > @@ -2219,7 +2219,7 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, > return 0; > > policy->cur = freq; > - arch_set_freq_scale(policy->related_cpus, freq, > + arch_set_freq_scale(policy->cpus, freq, > arch_scale_freq_ref(policy->cpu)); > cpufreq_stats_record_transition(policy, freq); >
The commit title does read a bit wrong I think. It does not really reflect the change for supporting cpu hotplug. Maybe smth along the lines of: arm64: topology: Handle AMU FIE setup on CPU hotplug ? On Tue, Aug 19, 2025 at 03:29:31PM +0800, Lifeng Zheng wrote: > When boot with maxcpu=1 restrict, and LPI(Low Power Idle States) is on, So actually it is `maxcpus` to start with, Not sure what LPI has to do with any of that ? It might be better to slightly reword the whole commit message. The problem is with CPUs being offline at the time the cpufreq policy is being created so it might be with maxcpus != nr_cpus , or because cpu bring-up failed, or due to RAS event that caused the cpu to go offline, etc. How about: "When a cpufreq policy is created, AMU FIE setup currently verifies all CPUs in the policy, regardless of whether they are online. If any of those CPUs are offline, their AMU capability flag is not yet verified, and the check fails. As a result, AMU FIE is not enabled even if the CPUs that are online do support it. Later, when the offline CPUs eventually come online and advertise AMU support, they have no opportunity to re-enable AMU FIE for the policy, leaving the whole frequency domain without AMU FIE despite being eligible. Restrict the initial AMU FIE check to the CPUs that are online at the time the policy is created, and allow CPUs brought online later to join the policy with AMU FIE enabled." > only CPU0 will go online. The support AMU flag of CPU0 will be set but the > flags of other CPUs will not. This will cause AMU FIE set up fail for CPU0 > when it shares a cpufreq policy with other CPU(s). After that, when other > CPUs are finally online and the support AMU flags of them are set, they'll > never have a chance to set up AMU FIE, even though they're eligible. > > To solve this problem, the process of setting up AMU FIE needs to be > modified as follows: > > 1. Set up AMU FIE only for the online CPUs. > > 2. Try to set up AMU FIE each time a CPU goes online and do the > freq_counters_valid() check. If this check fails, clear scale freq source > of all the CPUs related to the same policy, in case they use different > source of the freq scale. > > At the same time, this change also be applied to cpufreq when calling > arch_set_freq_scale. Could we clarify that a bit ? Reads a bit ambiguous. > > Signed-off-by: Lifeng Zheng <zhenglifeng1@huawei.com> > --- > arch/arm64/kernel/topology.c | 54 ++++++++++++++++++++++++++++++++++-- > drivers/cpufreq/cpufreq.c | 4 +-- > 2 files changed, 54 insertions(+), 4 deletions(-) > > diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c > index 9317a618bb87..a9d9e9969cea 100644 > --- a/arch/arm64/kernel/topology.c > +++ b/arch/arm64/kernel/topology.c > @@ -385,7 +385,7 @@ static int init_amu_fie_callback(struct notifier_block *nb, unsigned long val, > struct cpufreq_policy *policy = data; > > if (val == CPUFREQ_CREATE_POLICY) > - amu_fie_setup(policy->related_cpus); > + amu_fie_setup(policy->cpus); > > /* > * We don't need to handle CPUFREQ_REMOVE_POLICY event as the AMU > @@ -404,10 +404,60 @@ static struct notifier_block init_amu_fie_notifier = { > .notifier_call = init_amu_fie_callback, > }; > > +static int cpuhp_topology_online(unsigned int cpu) > +{ > + struct cpufreq_policy *policy = cpufreq_cpu_policy(cpu); > + > + /* > + * If the online CPUs are not all AMU FIE CPUs or the new one is already > + * an AMU FIE one, no need to set it. > + */ > + if (!policy || !cpumask_available(amu_fie_cpus) || > + !cpumask_subset(policy->cpus, amu_fie_cpus) || > + cpumask_test_cpu(cpu, amu_fie_cpus)) > + return 0; I believe this can be slightly optimised and made more ... readable, i.e: diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index a9d9e9969cea..2d6ce34af8e4 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -408,15 +408,25 @@ static int cpuhp_topology_online(unsigned int cpu) { struct cpufreq_policy *policy = cpufreq_cpu_policy(cpu); + /* Those are cheap checks */ + /* - * If the online CPUs are not all AMU FIE CPUs or the new one is already - * an AMU FIE one, no need to set it. + * Skip this CPU if: + * - it has no cpufreq policy assigned yet, + * - no policy exists that spans CPUs with AMU counters, or + * - it was already handled. */ - if (!policy || !cpumask_available(amu_fie_cpus) || - !cpumask_subset(policy->cpus, amu_fie_cpus) || + if (unlikely(!policy || !cpumask_available(amu_fie_cpus)) || cpumask_test_cpu(cpu, amu_fie_cpus)) return 0; + /* + * Only proceed if all already-online CPUs in this policy + * support AMU counters. + */ + if (unlikely(!cpumask_subset(policy->cpus, amu_fie_cpus))) + return 0; + /* * If the new online CPU cannot pass this check, all the CPUs related to * the same policy should be clear from amu_fie_cpus mask, otherwise they --- BR Beata > + > + /* > + * If the new online CPU cannot pass this check, all the CPUs related to > + * the same policy should be clear from amu_fie_cpus mask, otherwise they > + * may use different source of the freq scale. > + */ > + if (WARN_ON(!freq_counters_valid(cpu))) { > + topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_ARCH, > + policy->related_cpus); > + cpumask_andnot(amu_fie_cpus, amu_fie_cpus, policy->related_cpus); > + return 0; > + } > + > + cpumask_set_cpu(cpu, amu_fie_cpus); > + > + topology_set_scale_freq_source(&amu_sfd, cpumask_of(cpu)); > + > + pr_debug("CPU[%u]: counter will be used for FIE.", cpu); > + > + return 0; > +} > + > static int __init init_amu_fie(void) > { > - return cpufreq_register_notifier(&init_amu_fie_notifier, > + int ret; > + > + ret = cpufreq_register_notifier(&init_amu_fie_notifier, > CPUFREQ_POLICY_NOTIFIER); > + if (ret) > + return ret; > + > + ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, > + "arm64/topology:online", > + cpuhp_topology_online, > + NULL); > + if (ret < 0) { > + cpufreq_unregister_notifier(&init_amu_fie_notifier, > + CPUFREQ_POLICY_NOTIFIER); > + return ret; > + } > + > + return 0; > } > core_initcall(init_amu_fie); > > diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c > index 78ca68ea754d..d1890a2af1af 100644 > --- a/drivers/cpufreq/cpufreq.c > +++ b/drivers/cpufreq/cpufreq.c > @@ -417,7 +417,7 @@ void cpufreq_freq_transition_end(struct cpufreq_policy *policy, > > cpufreq_notify_post_transition(policy, freqs, transition_failed); > > - arch_set_freq_scale(policy->related_cpus, > + arch_set_freq_scale(policy->cpus, > policy->cur, > arch_scale_freq_ref(policy->cpu)); > > @@ -2219,7 +2219,7 @@ unsigned int cpufreq_driver_fast_switch(struct cpufreq_policy *policy, > return 0; > > policy->cur = freq; > - arch_set_freq_scale(policy->related_cpus, freq, > + arch_set_freq_scale(policy->cpus, freq, > arch_scale_freq_ref(policy->cpu)); > cpufreq_stats_record_transition(policy, freq); > > -- > 2.33.0 >
© 2016 - 2025 Red Hat, Inc.