drivers/cpufreq/acpi-cpufreq.c | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-)
The boost-related code in cpufreq has undergone several changes over the
years, but this particular piece remained unchanged and is now outdated.
The cpufreq core currently manages boost settings during initialization,
and only when necessary. As such, there's no longer a need to enable
boost explicitly when entering system suspend.
Previously, this wasn’t causing issues because boost settings were
force-updated during policy initialization. However, commit 2b16c631832d
("cpufreq: ACPI: Remove set_boost in acpi_cpufreq_cpu_init()") changed
that behavior—correctly—by avoiding unnecessary updates.
As a result of this change, if boost was disabled prior to suspend, it
remains disabled on resume—as expected. But due to the current code
forcibly enabling boost at suspend time, the system ends up with boost
frequencies enabled after resume, even if the global boost flag was
disabled. This contradicts the intended behavior.
Fix this by not enabling boost on policy exit.
Fixes: 2b16c631832d ("cpufreq: ACPI: Remove set_boost in acpi_cpufreq_cpu_init()")
Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220013
Reported-by: Nicholas Chin <nic.c3.14@gmail.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
drivers/cpufreq/acpi-cpufreq.c | 23 +++--------------------
1 file changed, 3 insertions(+), 20 deletions(-)
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 924314cdeebc..85b5a88f723f 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -89,8 +89,9 @@ static bool boost_state(unsigned int cpu)
return false;
}
-static int boost_set_msr(bool enable)
+static void boost_set_msr_each(void *p_en)
{
+ bool enable = (bool) p_en;
u32 msr_addr;
u64 msr_mask, val;
@@ -107,7 +108,7 @@ static int boost_set_msr(bool enable)
msr_mask = MSR_K7_HWCR_CPB_DIS;
break;
default:
- return -EINVAL;
+ return;
}
rdmsrl(msr_addr, val);
@@ -118,14 +119,6 @@ static int boost_set_msr(bool enable)
val |= msr_mask;
wrmsrl(msr_addr, val);
- return 0;
-}
-
-static void boost_set_msr_each(void *p_en)
-{
- bool enable = (bool) p_en;
-
- boost_set_msr(enable);
}
static int set_boost(struct cpufreq_policy *policy, int val)
@@ -532,15 +525,6 @@ static void free_acpi_perf_data(void)
free_percpu(acpi_perf_data);
}
-static int cpufreq_boost_down_prep(unsigned int cpu)
-{
- /*
- * Clear the boost-disable bit on the CPU_DOWN path so that
- * this cpu cannot block the remaining ones from boosting.
- */
- return boost_set_msr(1);
-}
-
/*
* acpi_cpufreq_early_init - initialize ACPI P-States library
*
@@ -931,7 +915,6 @@ static void acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
pr_debug("%s\n", __func__);
- cpufreq_boost_down_prep(policy->cpu);
policy->fast_switch_possible = false;
policy->driver_data = NULL;
acpi_processor_unregister_performance(data->acpi_perf_cpu);
--
2.31.1.272.g89b43f80a514
> The boost-related code in cpufreq has undergone several changes over the
> years, but this particular piece remained unchanged and is now outdated.
>
> The cpufreq core currently manages boost settings during initialization,
> and only when necessary. As such, there's no longer a need to enable
> boost explicitly when entering system suspend.
>
> Previously, this wasn’t causing issues because boost settings were
> force-updated during policy initialization. However, commit 2b16c631832d
> ("cpufreq: ACPI: Remove set_boost in acpi_cpufreq_cpu_init()") changed
> that behavior—correctly—by avoiding unnecessary updates.
>
> As a result of this change, if boost was disabled prior to suspend, it
> remains disabled on resume—as expected. But due to the current code
> forcibly enabling boost at suspend time, the system ends up with boost
> frequencies enabled after resume, even if the global boost flag was
> disabled. This contradicts the intended behavior.
>
> Fix this by not enabling boost on policy exit.
>
> Fixes: 2b16c631832d ("cpufreq: ACPI: Remove set_boost in acpi_cpufreq_cpu_init()")
> Closes:https://bugzilla.kernel.org/show_bug.cgi?id=220013
> Reported-by: Nicholas Chin<nic.c3.14@gmail.com>
> Signed-off-by: Viresh Kumar<viresh.kumar@linaro.org>
> ---
> drivers/cpufreq/acpi-cpufreq.c | 23 +++--------------------
> 1 file changed, 3 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
> index 924314cdeebc..85b5a88f723f 100644
> --- a/drivers/cpufreq/acpi-cpufreq.c
> +++ b/drivers/cpufreq/acpi-cpufreq.c
> @@ -89,8 +89,9 @@ static bool boost_state(unsigned int cpu)
> return false;
> }
>
> -static int boost_set_msr(bool enable)
> +static void boost_set_msr_each(void *p_en)
> {
> + bool enable = (bool) p_en;
> u32 msr_addr;
> u64 msr_mask, val;
>
> @@ -107,7 +108,7 @@ static int boost_set_msr(bool enable)
> msr_mask = MSR_K7_HWCR_CPB_DIS;
> break;
> default:
> - return -EINVAL;
> + return;
> }
>
> rdmsrl(msr_addr, val);
> @@ -118,14 +119,6 @@ static int boost_set_msr(bool enable)
> val |= msr_mask;
>
> wrmsrl(msr_addr, val);
> - return 0;
> -}
> -
> -static void boost_set_msr_each(void *p_en)
> -{
> - bool enable = (bool) p_en;
> -
> - boost_set_msr(enable);
> }
>
> static int set_boost(struct cpufreq_policy *policy, int val)
> @@ -532,15 +525,6 @@ static void free_acpi_perf_data(void)
> free_percpu(acpi_perf_data);
> }
>
> -static int cpufreq_boost_down_prep(unsigned int cpu)
> -{
> - /*
> - * Clear the boost-disable bit on the CPU_DOWN path so that
> - * this cpu cannot block the remaining ones from boosting.
> - */
> - return boost_set_msr(1);
> -}
> -
> /*
> * acpi_cpufreq_early_init - initialize ACPI P-States library
> *
> @@ -931,7 +915,6 @@ static void acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
>
> pr_debug("%s\n", __func__);
>
> - cpufreq_boost_down_prep(policy->cpu);
> policy->fast_switch_possible = false;
> policy->driver_data = NULL;
> acpi_processor_unregister_performance(data->acpi_perf_cpu);
Unfortunately the issue I reported still seems to be present after applying this patch. Upon resuming from suspend, the system is still entering boost states descpite the boost flag being set to 0.
On 16-04-25, 19:54, Nicholas Chin wrote: > Unfortunately the issue I reported still seems to be present after > applying this patch. Upon resuming from suspend, the system is still > entering boost states descpite the boost flag being set to 0. Okay, so this is what we know so far: - Force synchronizing (disabling here) boost state at resume was making this work earlier. - Setting the boost flag to "enabled" state during resume works as well, as that makes the cpufreq core disable the boost frequencies again. - This patch (though doing the correct thing) doesn't work. This is one of the known places where boost was getting enabled before going to suspend though. - This means that some other part of kernel (or hardware ?) is enabling the boost frequencies at suspend (or early resume), which the kernel was disabling earlier and not anymore. Rafael, any suggestions ? -- viresh
On Thu, Apr 17, 2025 at 7:02 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > On 16-04-25, 19:54, Nicholas Chin wrote: > > Unfortunately the issue I reported still seems to be present after > > applying this patch. Upon resuming from suspend, the system is still > > entering boost states descpite the boost flag being set to 0. > > Okay, so this is what we know so far: > > - Force synchronizing (disabling here) boost state at resume was > making this work earlier. > > - Setting the boost flag to "enabled" state during resume works as > well, as that makes the cpufreq core disable the boost frequencies > again. > > - This patch (though doing the correct thing) doesn't work. This is > one of the known places where boost was getting enabled before going > to suspend though. > > - This means that some other part of kernel (or hardware ?) is > enabling the boost frequencies at suspend (or early resume), which > the kernel was disabling earlier and not anymore. > > Rafael, any suggestions ? This a resume from S3, so the platform firmware may enable the boost in its resume path.
Copying more information from Bugzilla here (Nicholas, it would be faster if you can put all your observations here first, more people are looking at emails than bugzilla). > Nicholas Chin wrote: > I did some more testing and debugging and it seems like when > cpufreq_online() runs after waking the system, policy->boost_enabled > and cpufreq_boost_enabled() are both 0, so the set_boost() at the end > of that function is never run. Right, this is what I wanted to do with the $Subject patch. Don't update boost anymore in suspend/resume > cpufreq_boost_enabled() being 0 indicates that the MSR has boosting > disabled, but when I read out that MSR using rdmsr the bit seems to > indicate that it is actually enabled (I am aware of the inverted logic > of that bit). set_boost() seems to be the only place in the kernel > that causes that MSR to be modified, and I didn't see any extra calls > to it in my debug logs, so it seems like something else (outside the > kernel?) is setting that MSR. And this is what I feel too, something else in kernel or outside of it is doing something tricky. -- viresh
On Thu, Apr 17, 2025 at 7:09 AM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > Copying more information from Bugzilla here (Nicholas, it would be > faster if you can put all your observations here first, more people > are looking at emails than bugzilla). > > > Nicholas Chin wrote: > > I did some more testing and debugging and it seems like when > > cpufreq_online() runs after waking the system, policy->boost_enabled > > and cpufreq_boost_enabled() are both 0, so the set_boost() at the end > > of that function is never run. > > Right, this is what I wanted to do with the $Subject patch. Don't > update boost anymore in suspend/resume This is going to work for suspend-to-idle, but not necessarily for S3. BTW, the patch is correct IMV, so I'm not going to drop it, but it looks like something more is needed on top of it. > > cpufreq_boost_enabled() being 0 indicates that the MSR has boosting > > disabled, but when I read out that MSR using rdmsr the bit seems to > > indicate that it is actually enabled (I am aware of the inverted logic > > of that bit). set_boost() seems to be the only place in the kernel > > that causes that MSR to be modified, and I didn't see any extra calls > > to it in my debug logs, so it seems like something else (outside the > > kernel?) is setting that MSR. > > And this is what I feel too, something else in kernel or outside of it > is doing something tricky. On a resume from S3, you actually don't know if the platform firmware has preserved the configuration from before the suspend transition. It may not.
On Thu, Apr 17, 2025 at 2:39 PM Rafael J. Wysocki <rafael@kernel.org> wrote:
>
> On Thu, Apr 17, 2025 at 7:09 AM Viresh Kumar <viresh.kumar@linaro.org> wrote:
> >
> > Copying more information from Bugzilla here (Nicholas, it would be
> > faster if you can put all your observations here first, more people
> > are looking at emails than bugzilla).
> >
> > > Nicholas Chin wrote:
> > > I did some more testing and debugging and it seems like when
> > > cpufreq_online() runs after waking the system, policy->boost_enabled
> > > and cpufreq_boost_enabled() are both 0, so the set_boost() at the end
> > > of that function is never run.
> >
> > Right, this is what I wanted to do with the $Subject patch. Don't
> > update boost anymore in suspend/resume
>
> This is going to work for suspend-to-idle, but not necessarily for S3.
>
> BTW, the patch is correct IMV, so I'm not going to drop it, but it
> looks like something more is needed on top of it.
But the changelog isn't because the patch really doesn't address the
issue at hand, which is most likely related to the platform firmware
clearing the "boost disable" bit.
Frankly, I'd rather get back to the state from before commit
2b16c631832d ("cpufreq: ACPI: Remove set_boost in
acpi_cpufreq_cpu_init()") and start over with the knowledge that the
platform firmware may scribble on the MSR before the kernel gets
control back.
On a way back from system suspend the MSR needs to be put back in sync
with the boost settings in the kernel.
On Thu, 17 Apr 2025 at 21:23, Rafael J. Wysocki <rafael@kernel.org> wrote:
> But the changelog isn't because the patch really doesn't address the
> issue at hand, which is most likely related to the platform firmware
> clearing the "boost disable" bit.
I think the patch and changelog were still correct as the driver was also
enabling the boost at exit(). So it fixes the problem, but not fully.
> Frankly, I'd rather get back to the state from before commit
> 2b16c631832d ("cpufreq: ACPI: Remove set_boost in
> acpi_cpufreq_cpu_init()") and start over with the knowledge that the
> platform firmware may scribble on the MSR before the kernel gets
> control back.
>
> On a way back from system suspend the MSR needs to be put back in sync
> with the boost settings in the kernel.
What about something like this instead ? Nicholas, can you give this a try
along with the $Subject patch (both patches should be applied) ?
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 924314cdeebc..71557f2ac22a 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -909,8 +909,10 @@ static int acpi_cpufreq_cpu_init(struct
cpufreq_policy *policy)
if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency)
pr_warn(FW_WARN "P-state 0 is not max freq\n");
- if (acpi_cpufreq_driver.set_boost)
+ if (acpi_cpufreq_driver.set_boost) {
policy->boost_supported = true;
+ policy->boost_enabled = boost_state(cpu);
+ }
return result;
On 2025-04-17 23:58, Viresh Kumar wrote:
> What about something like this instead ? Nicholas, can you give this a try
> along with the $Subject patch (both patches should be applied) ?
>
> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
> index 924314cdeebc..71557f2ac22a 100644
> --- a/drivers/cpufreq/acpi-cpufreq.c
> +++ b/drivers/cpufreq/acpi-cpufreq.c
> @@ -909,8 +909,10 @@ static int acpi_cpufreq_cpu_init(struct
> cpufreq_policy *policy)
> if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency)
> pr_warn(FW_WARN "P-state 0 is not max freq\n");
>
> - if (acpi_cpufreq_driver.set_boost)
> + if (acpi_cpufreq_driver.set_boost) {
> policy->boost_supported = true;
> + policy->boost_enabled = boost_state(cpu);
> + }
>
> return result;
Thanks, applying this patch along with the $Subject patch works.
On Fri, Apr 18, 2025 at 7:06 PM Nicholas Chin <nic.c3.14@gmail.com> wrote:
>
> On 2025-04-17 23:58, Viresh Kumar wrote:
> > What about something like this instead ? Nicholas, can you give this a try
> > along with the $Subject patch (both patches should be applied) ?
> >
> > diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
> > index 924314cdeebc..71557f2ac22a 100644
> > --- a/drivers/cpufreq/acpi-cpufreq.c
> > +++ b/drivers/cpufreq/acpi-cpufreq.c
> > @@ -909,8 +909,10 @@ static int acpi_cpufreq_cpu_init(struct
> > cpufreq_policy *policy)
> > if (perf->states[0].core_frequency * 1000 != freq_table[0].frequency)
> > pr_warn(FW_WARN "P-state 0 is not max freq\n");
> >
> > - if (acpi_cpufreq_driver.set_boost)
> > + if (acpi_cpufreq_driver.set_boost) {
> > policy->boost_supported = true;
> > + policy->boost_enabled = boost_state(cpu);
So it updates policy->boost_enabled in accordance with the current
setting in the MSR.
IMO it would be better to update the MSR in accordance with
policy->boost_enabled or users may get confused if their boost
settings change after a suspend-resume cycle. Or have I got lost
completely?
> > + }
> >
> > return result;
>
> Thanks, applying this patch along with the $Subject patch works.
On Sat, 19 Apr 2025 at 00:58, Rafael J. Wysocki <rafael@kernel.org> wrote:
> So it updates policy->boost_enabled in accordance with the current
> setting in the MSR.
Yes.
> IMO it would be better to update the MSR in accordance with
> policy->boost_enabled or users may get confused if their boost
> settings change after a suspend-resume cycle. Or have I got lost
> completely?
I wrote this patch based on the sync that happens at the end of
cpufreq_online():
/* Let the per-policy boost flag mirror the cpufreq_driver
boost during init */
if (cpufreq_driver->set_boost && policy->boost_supported &&
policy->boost_enabled != cpufreq_boost_enabled()) {
policy->boost_enabled = cpufreq_boost_enabled();
ret = cpufreq_driver->set_boost(policy, policy->boost_enabled);
if (ret) {
/* If the set_boost fails, the online
operation is not affected */
pr_info("%s: CPU%d: Cannot %s BOOST\n",
__func__, policy->cpu,
str_enable_disable(policy->boost_enabled));
policy->boost_enabled = !policy->boost_enabled;
}
}
So my patch works as the cpufreq core force-syncs the state at init
(pretty much what the driver was doing before).
Though I now wonder if this code (in cpufreq_online()) is really doing
the right thing or not. So if global boost is enabled before suspend with
policy boost being disabled, the policy boost will be enabled on resume.
--
Viresh
On 2025/4/19 15:54, Viresh Kumar wrote:
> On Sat, 19 Apr 2025 at 00:58, Rafael J. Wysocki <rafael@kernel.org> wrote:
>
>> So it updates policy->boost_enabled in accordance with the current
>> setting in the MSR.
>
> Yes.
>
>> IMO it would be better to update the MSR in accordance with
>> policy->boost_enabled or users may get confused if their boost
>> settings change after a suspend-resume cycle. Or have I got lost
>> completely?
>
> I wrote this patch based on the sync that happens at the end of
> cpufreq_online():
>
> /* Let the per-policy boost flag mirror the cpufreq_driver
> boost during init */
> if (cpufreq_driver->set_boost && policy->boost_supported &&
> policy->boost_enabled != cpufreq_boost_enabled()) {
> policy->boost_enabled = cpufreq_boost_enabled();
> ret = cpufreq_driver->set_boost(policy, policy->boost_enabled);
> if (ret) {
> /* If the set_boost fails, the online
> operation is not affected */
> pr_info("%s: CPU%d: Cannot %s BOOST\n",
> __func__, policy->cpu,
> str_enable_disable(policy->boost_enabled));
> policy->boost_enabled = !policy->boost_enabled;
> }
> }
>
> So my patch works as the cpufreq core force-syncs the state at init
> (pretty much what the driver was doing before).
>
> Though I now wonder if this code (in cpufreq_online()) is really doing
> the right thing or not. So if global boost is enabled before suspend with
> policy boost being disabled, the policy boost will be enabled on resume.
Yes, the policy boost will be forcibly set to mirror the global boost. This
indicates that the global boost value is the default value of policy boost
each time the CPU goes online. Otherwise, we'll meet things like:
1. The global boost is set to disabled after a CPU going offline but the
policy boost is still be enabled after the CPU going online again.
2. The global boost is set to enabled after a CPU going offline and the
rest of the online CPUs are all boost enabled. However, the offline CPU
remains in the boost disabled state after it going online again. Users
have to set its boost state separately.
IMV, a user set the global boost means "I want all policy boost/unboost",
every CPU going online after that should follow this order. So I think
the code in cpufreq_online() is doing the right thing.
BTW, I think there is optimization can be done in
cpufreq_boost_trigger_state(). Currently, Nothing will happend if users set
global boost flag to true when this flag is already true. But I think it's
better to set all policies to boost in this situation. It might make more
sense to think of this as a refresh operation. This is just my idea. I'd
like to hear your opinion.
>
> --
> Viresh
Coming back to this response again:
On 19-04-25, 17:35, zhenglifeng (A) wrote:
> Yes, the policy boost will be forcibly set to mirror the global boost. This
> indicates that the global boost value is the default value of policy boost
> each time the CPU goes online. Otherwise, we'll meet things like:
>
> 1. The global boost is set to disabled after a CPU going offline but the
> policy boost is still be enabled after the CPU going online again.
This is surely a valid case, we must not enable policy boost when
global boost is disabled.
> 2. The global boost is set to enabled after a CPU going offline and the
> rest of the online CPUs are all boost enabled. However, the offline CPU
> remains in the boost disabled state after it going online again. Users
> have to set its boost state separately.
I now this this is the right behavior. The policy wasn't present when
the global boost was enabled and so the action doesn't necessarily
apply to it.
This is how I think this should be fixed, we may still need to fix
acpi driver's bug separately though:
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 3841c9da6cac..7ac8b4c28658 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -620,6 +620,20 @@ static ssize_t show_local_boost(struct cpufreq_policy *policy, char *buf)
return sysfs_emit(buf, "%d\n", policy->boost_enabled);
}
+static int policy_set_boost(struct cpufreq_policy *policy, bool enable, bool forced)
+{
+ if (!forced && (policy->boost_enabled == enable))
+ return 0;
+
+ policy->boost_enabled = enable;
+
+ ret = cpufreq_driver->set_boost(policy, enable);
+ if (ret)
+ policy->boost_enabled = !policy->boost_enabled;
+
+ return ret;
+}
+
static ssize_t store_local_boost(struct cpufreq_policy *policy,
const char *buf, size_t count)
{
@@ -635,21 +649,14 @@ static ssize_t store_local_boost(struct cpufreq_policy *policy,
if (!policy->boost_supported)
return -EINVAL;
- if (policy->boost_enabled == enable)
- return count;
-
- policy->boost_enabled = enable;
-
cpus_read_lock();
- ret = cpufreq_driver->set_boost(policy, enable);
+ ret = policy_set_boost(policy, enable, false);
cpus_read_unlock();
- if (ret) {
- policy->boost_enabled = !policy->boost_enabled;
- return ret;
- }
+ if (!ret)
+ return count;
- return count;
+ return ret;
}
static struct freq_attr local_boost = __ATTR(boost, 0644, show_local_boost, store_local_boost);
@@ -1617,16 +1624,17 @@ static int cpufreq_online(unsigned int cpu)
if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver))
policy->cdev = of_cpufreq_cooling_register(policy);
- /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
+ /*
+ * Let the per-policy boost flag mirror the cpufreq_driver boost during
+ * initialization for a new policy. For an existing policy, maintain the
+ * previous boost value unless global boost is disabled now.
+ */
if (cpufreq_driver->set_boost && policy->boost_supported &&
- policy->boost_enabled != cpufreq_boost_enabled()) {
- policy->boost_enabled = cpufreq_boost_enabled();
- ret = cpufreq_driver->set_boost(policy, policy->boost_enabled);
+ (new_policy || !cpufreq_boost_enabled())) {
+ ret = policy_set_boost(policy, cpufreq_boost_enabled(), false);
if (ret) {
- /* If the set_boost fails, the online operation is not affected */
- pr_info("%s: CPU%d: Cannot %s BOOST\n", __func__, policy->cpu,
- str_enable_disable(policy->boost_enabled));
- policy->boost_enabled = !policy->boost_enabled;
+ pr_info("%s: CPU%d: Cannot %s BOOST\n", __func__,
+ policy->cpu, str_enable_disable(cpufreq_boost_enabled()));
}
}
@@ -2864,12 +2872,9 @@ static int cpufreq_boost_trigger_state(int state)
if (!policy->boost_supported)
continue;
- policy->boost_enabled = state;
- ret = cpufreq_driver->set_boost(policy, state);
- if (ret) {
- policy->boost_enabled = !policy->boost_enabled;
+ ret = policy_set_boost(policy, state, true);
+ if (ret)
goto err_reset_state;
- }
}
cpus_read_unlock();
--
viresh
On 2025/4/21 19:37, Viresh Kumar wrote:
> Coming back to this response again:
>
> On 19-04-25, 17:35, zhenglifeng (A) wrote:
>> Yes, the policy boost will be forcibly set to mirror the global boost. This
>> indicates that the global boost value is the default value of policy boost
>> each time the CPU goes online. Otherwise, we'll meet things like:
>>
>> 1. The global boost is set to disabled after a CPU going offline but the
>> policy boost is still be enabled after the CPU going online again.
>
> This is surely a valid case, we must not enable policy boost when
> global boost is disabled.
>
>> 2. The global boost is set to enabled after a CPU going offline and the
>> rest of the online CPUs are all boost enabled. However, the offline CPU
>> remains in the boost disabled state after it going online again. Users
>> have to set its boost state separately.
>
> I now this this is the right behavior. The policy wasn't present when
> the global boost was enabled and so the action doesn't necessarily
> apply to it.
OK. I just think that in this case the users would generally want it to be
true. But if you think this is the right behavior, I'll accept it.
>
> This is how I think this should be fixed, we may still need to fix
> acpi driver's bug separately though:
>
> diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
> index 3841c9da6cac..7ac8b4c28658 100644
> --- a/drivers/cpufreq/cpufreq.c
> +++ b/drivers/cpufreq/cpufreq.c
> @@ -620,6 +620,20 @@ static ssize_t show_local_boost(struct cpufreq_policy *policy, char *buf)
> return sysfs_emit(buf, "%d\n", policy->boost_enabled);
> }
>
> +static int policy_set_boost(struct cpufreq_policy *policy, bool enable, bool forced)
> +{
> + if (!forced && (policy->boost_enabled == enable))
> + return 0;
> +
> + policy->boost_enabled = enable;
> +
> + ret = cpufreq_driver->set_boost(policy, enable);
> + if (ret)
> + policy->boost_enabled = !policy->boost_enabled;
This may cause boost_enabled becomes false but actually boosted when forced
is true and trying to set boost_enabled from true to true.
> +
> + return ret;
> +}
> +
> static ssize_t store_local_boost(struct cpufreq_policy *policy,
> const char *buf, size_t count)
> {
> @@ -635,21 +649,14 @@ static ssize_t store_local_boost(struct cpufreq_policy *policy,
> if (!policy->boost_supported)
> return -EINVAL;
>
> - if (policy->boost_enabled == enable)
> - return count;
> -
> - policy->boost_enabled = enable;
> -
> cpus_read_lock();
> - ret = cpufreq_driver->set_boost(policy, enable);
> + ret = policy_set_boost(policy, enable, false);
> cpus_read_unlock();
>
> - if (ret) {
> - policy->boost_enabled = !policy->boost_enabled;
> - return ret;
> - }
> + if (!ret)
> + return count;
>
> - return count;
> + return ret;
> }
>
> static struct freq_attr local_boost = __ATTR(boost, 0644, show_local_boost, store_local_boost);
> @@ -1617,16 +1624,17 @@ static int cpufreq_online(unsigned int cpu)
> if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver))
> policy->cdev = of_cpufreq_cooling_register(policy);
>
> - /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
> + /*
> + * Let the per-policy boost flag mirror the cpufreq_driver boost during
> + * initialization for a new policy. For an existing policy, maintain the
> + * previous boost value unless global boost is disabled now.
> + */
> if (cpufreq_driver->set_boost && policy->boost_supported &&
> - policy->boost_enabled != cpufreq_boost_enabled()) {
> - policy->boost_enabled = cpufreq_boost_enabled();
> - ret = cpufreq_driver->set_boost(policy, policy->boost_enabled);
> + (new_policy || !cpufreq_boost_enabled())) {
> + ret = policy_set_boost(policy, cpufreq_boost_enabled(), false);
I think forced here should be true. If new_policy and
!cpufreq_boost_enabled() but the cpu is actually boosted by some other
reason (like what we met in acpi-cpufreq), set_boost() should be forcibly
executed to make the cpu unboost.
> if (ret) {
> - /* If the set_boost fails, the online operation is not affected */
> - pr_info("%s: CPU%d: Cannot %s BOOST\n", __func__, policy->cpu,
> - str_enable_disable(policy->boost_enabled));
> - policy->boost_enabled = !policy->boost_enabled;
> + pr_info("%s: CPU%d: Cannot %s BOOST\n", __func__,
> + policy->cpu, str_enable_disable(cpufreq_boost_enabled()));
> }
> }
>
> @@ -2864,12 +2872,9 @@ static int cpufreq_boost_trigger_state(int state)
> if (!policy->boost_supported)
> continue;
>
> - policy->boost_enabled = state;
> - ret = cpufreq_driver->set_boost(policy, state);
> - if (ret) {
> - policy->boost_enabled = !policy->boost_enabled;
> + ret = policy_set_boost(policy, state, true);
Sorry, I can't see why forced need to be true here but false in other
places. Actually, the optimization I mentioned earlier is like:
@@ -2870,16 +2870,13 @@ static int cpufreq_boost_trigger_state(int state)
unsigned long flags;
int ret = 0;
- if (cpufreq_driver->boost_enabled == state)
- return 0;
-
write_lock_irqsave(&cpufreq_driver_lock, flags);
cpufreq_driver->boost_enabled = state;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
cpus_read_lock();
for_each_active_policy(policy) {
- if (!policy->boost_supported)
+ if (!policy->boost_supported || (policy->boost_enabled == state))
continue;
policy->boost_enabled = state;
> + if (ret)
> goto err_reset_state;
> - }
> }
> cpus_read_unlock();
>
On 21-04-25, 21:36, zhenglifeng (A) wrote:
> On 2025/4/21 19:37, Viresh Kumar wrote:
> > +static int policy_set_boost(struct cpufreq_policy *policy, bool enable, bool forced)
> > +{
> > + if (!forced && (policy->boost_enabled == enable))
> > + return 0;
> > +
> > + policy->boost_enabled = enable;
> > +
> > + ret = cpufreq_driver->set_boost(policy, enable);
> > + if (ret)
> > + policy->boost_enabled = !policy->boost_enabled;
>
> This may cause boost_enabled becomes false but actually boosted when forced
> is true and trying to set boost_enabled from true to true.
Can't do much in case of failure. And this is the current behavior
anyway. This was just some code cleanup, doesn't change the behavior
of the code.
> > static struct freq_attr local_boost = __ATTR(boost, 0644, show_local_boost, store_local_boost);
> > @@ -1617,16 +1624,17 @@ static int cpufreq_online(unsigned int cpu)
> > if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver))
> > policy->cdev = of_cpufreq_cooling_register(policy);
> >
> > - /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
> > + /*
> > + * Let the per-policy boost flag mirror the cpufreq_driver boost during
> > + * initialization for a new policy. For an existing policy, maintain the
> > + * previous boost value unless global boost is disabled now.
> > + */
> > if (cpufreq_driver->set_boost && policy->boost_supported &&
> > - policy->boost_enabled != cpufreq_boost_enabled()) {
> > - policy->boost_enabled = cpufreq_boost_enabled();
> > - ret = cpufreq_driver->set_boost(policy, policy->boost_enabled);
> > + (new_policy || !cpufreq_boost_enabled())) {
> > + ret = policy_set_boost(policy, cpufreq_boost_enabled(), false);
>
> I think forced here should be true. If new_policy and
> !cpufreq_boost_enabled() but the cpu is actually boosted by some other
> reason (like what we met in acpi-cpufreq), set_boost() should be forcibly
> executed to make the cpu unboost.
The problem is that setting boost may be time consuming for some
platforms and we may not want to do that unnecessarily. ACPI cpufreq
should be fixed separately for that.
I am sending a series now to fix them all, please review that.
--
viresh
On 2025/4/22 17:41, Viresh Kumar wrote:
> On 21-04-25, 21:36, zhenglifeng (A) wrote:
>> On 2025/4/21 19:37, Viresh Kumar wrote:
>>> +static int policy_set_boost(struct cpufreq_policy *policy, bool enable, bool forced)
>>> +{
>>> + if (!forced && (policy->boost_enabled == enable))
>>> + return 0;
>>> +
>>> + policy->boost_enabled = enable;
>>> +
>>> + ret = cpufreq_driver->set_boost(policy, enable);
>>> + if (ret)
>>> + policy->boost_enabled = !policy->boost_enabled;
>>
>> This may cause boost_enabled becomes false but actually boosted when forced
>> is true and trying to set boost_enabled from true to true.
>
> Can't do much in case of failure. And this is the current behavior
> anyway. This was just some code cleanup, doesn't change the behavior
> of the code.
If forced is true, this may change the behavior. But I see you gave up this
parameter in new version, so I think it's OK now.
>
>>> static struct freq_attr local_boost = __ATTR(boost, 0644, show_local_boost, store_local_boost);
>>> @@ -1617,16 +1624,17 @@ static int cpufreq_online(unsigned int cpu)
>>> if (new_policy && cpufreq_thermal_control_enabled(cpufreq_driver))
>>> policy->cdev = of_cpufreq_cooling_register(policy);
>>>
>>> - /* Let the per-policy boost flag mirror the cpufreq_driver boost during init */
>>> + /*
>>> + * Let the per-policy boost flag mirror the cpufreq_driver boost during
>>> + * initialization for a new policy. For an existing policy, maintain the
>>> + * previous boost value unless global boost is disabled now.
>>> + */
>>> if (cpufreq_driver->set_boost && policy->boost_supported &&
>>> - policy->boost_enabled != cpufreq_boost_enabled()) {
>>> - policy->boost_enabled = cpufreq_boost_enabled();
>>> - ret = cpufreq_driver->set_boost(policy, policy->boost_enabled);
>>> + (new_policy || !cpufreq_boost_enabled())) {
>>> + ret = policy_set_boost(policy, cpufreq_boost_enabled(), false);
>>
>> I think forced here should be true. If new_policy and
>> !cpufreq_boost_enabled() but the cpu is actually boosted by some other
>> reason (like what we met in acpi-cpufreq), set_boost() should be forcibly
>> executed to make the cpu unboost.
>
> The problem is that setting boost may be time consuming for some
> platforms and we may not want to do that unnecessarily. ACPI cpufreq
> should be fixed separately for that.
Makes sense.
>
> I am sending a series now to fix them all, please review that.
>
On 19-04-25, 17:35, zhenglifeng (A) wrote: > Yes, the policy boost will be forcibly set to mirror the global boost. This > indicates that the global boost value is the default value of policy boost > each time the CPU goes online. Otherwise, we'll meet things like: > > 1. The global boost is set to disabled after a CPU going offline but the > policy boost is still be enabled after the CPU going online again. > > 2. The global boost is set to enabled after a CPU going offline and the > rest of the online CPUs are all boost enabled. However, the offline CPU > remains in the boost disabled state after it going online again. Users > have to set its boost state separately. I agree that both of these are valid issues, but so is retaining state across suspend/resume too.. There is a difference in a user manually removing a CPU (offline) and suspend/resume. With a manual offline operation, the code in cpufreq_online() is doing the right thing, default to global boost. But the user configuration shouldn't change with just suspend resume. > IMV, a user set the global boost means "I want all policy boost/unboost", > every CPU going online after that should follow this order. So I think > the code in cpufreq_online() is doing the right thing. Yes, but any change to policy->boost after that must also be honored. > BTW, I think there is optimization can be done in > cpufreq_boost_trigger_state(). Currently, Nothing will happend if users set > global boost flag to true when this flag is already true. But I think it's > better to set all policies to boost in this situation. It might make more > sense to think of this as a refresh operation. This is just my idea. I'd > like to hear your opinion. Makes sense. -- viresh
On 2025/4/21 14:20, Viresh Kumar wrote: > On 19-04-25, 17:35, zhenglifeng (A) wrote: >> Yes, the policy boost will be forcibly set to mirror the global boost. This >> indicates that the global boost value is the default value of policy boost >> each time the CPU goes online. Otherwise, we'll meet things like: >> >> 1. The global boost is set to disabled after a CPU going offline but the >> policy boost is still be enabled after the CPU going online again. >> >> 2. The global boost is set to enabled after a CPU going offline and the >> rest of the online CPUs are all boost enabled. However, the offline CPU >> remains in the boost disabled state after it going online again. Users >> have to set its boost state separately. > > I agree that both of these are valid issues, but so is retaining state > across suspend/resume too.. There is a difference in a user manually > removing a CPU (offline) and suspend/resume. > > With a manual offline operation, the code in cpufreq_online() is doing > the right thing, default to global boost. But the user configuration > shouldn't change with just suspend resume. > >> IMV, a user set the global boost means "I want all policy boost/unboost", >> every CPU going online after that should follow this order. So I think >> the code in cpufreq_online() is doing the right thing. > > Yes, but any change to policy->boost after that must also be honored. I see. Then I think the key is how to distinguish CPU offline/online and suspend/resume in cpufreq_online(). > >> BTW, I think there is optimization can be done in >> cpufreq_boost_trigger_state(). Currently, Nothing will happend if users set >> global boost flag to true when this flag is already true. But I think it's >> better to set all policies to boost in this situation. It might make more >> sense to think of this as a refresh operation. This is just my idea. I'd >> like to hear your opinion. > > Makes sense. >
On 2025/4/16 13:29, Viresh Kumar wrote:
> The boost-related code in cpufreq has undergone several changes over the
> years, but this particular piece remained unchanged and is now outdated.
>
> The cpufreq core currently manages boost settings during initialization,
> and only when necessary. As such, there's no longer a need to enable
> boost explicitly when entering system suspend.
>
> Previously, this wasn’t causing issues because boost settings were
> force-updated during policy initialization. However, commit 2b16c631832d
> ("cpufreq: ACPI: Remove set_boost in acpi_cpufreq_cpu_init()") changed
> that behavior—correctly—by avoiding unnecessary updates.
>
> As a result of this change, if boost was disabled prior to suspend, it
> remains disabled on resume—as expected. But due to the current code
> forcibly enabling boost at suspend time, the system ends up with boost
> frequencies enabled after resume, even if the global boost flag was
> disabled. This contradicts the intended behavior.
>
> Fix this by not enabling boost on policy exit.
>
> Fixes: 2b16c631832d ("cpufreq: ACPI: Remove set_boost in acpi_cpufreq_cpu_init()")
> Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220013
> Reported-by: Nicholas Chin <nic.c3.14@gmail.com>
> Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> ---
> drivers/cpufreq/acpi-cpufreq.c | 23 +++--------------------
> 1 file changed, 3 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
> index 924314cdeebc..85b5a88f723f 100644
> --- a/drivers/cpufreq/acpi-cpufreq.c
> +++ b/drivers/cpufreq/acpi-cpufreq.c
> @@ -89,8 +89,9 @@ static bool boost_state(unsigned int cpu)
> return false;
> }
>
> -static int boost_set_msr(bool enable)
> +static void boost_set_msr_each(void *p_en)
> {
> + bool enable = (bool) p_en;
> u32 msr_addr;
> u64 msr_mask, val;
>
> @@ -107,7 +108,7 @@ static int boost_set_msr(bool enable)
> msr_mask = MSR_K7_HWCR_CPB_DIS;
> break;
> default:
> - return -EINVAL;
> + return;
> }
>
> rdmsrl(msr_addr, val);
> @@ -118,14 +119,6 @@ static int boost_set_msr(bool enable)
> val |= msr_mask;
>
> wrmsrl(msr_addr, val);
> - return 0;
> -}
> -
> -static void boost_set_msr_each(void *p_en)
> -{
> - bool enable = (bool) p_en;
> -
> - boost_set_msr(enable);
> }
>
> static int set_boost(struct cpufreq_policy *policy, int val)
> @@ -532,15 +525,6 @@ static void free_acpi_perf_data(void)
> free_percpu(acpi_perf_data);
> }
>
> -static int cpufreq_boost_down_prep(unsigned int cpu)
> -{
> - /*
> - * Clear the boost-disable bit on the CPU_DOWN path so that
> - * this cpu cannot block the remaining ones from boosting.
> - */
> - return boost_set_msr(1);
> -}
> -
> /*
> * acpi_cpufreq_early_init - initialize ACPI P-States library
> *
> @@ -931,7 +915,6 @@ static void acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
>
> pr_debug("%s\n", __func__);
>
> - cpufreq_boost_down_prep(policy->cpu);
> policy->fast_switch_possible = false;
> policy->driver_data = NULL;
> acpi_processor_unregister_performance(data->acpi_perf_cpu);
Nice!
I wonder why this cpufreq_boost_down_prep() was needed at the beginning.
Reviewed-by: Lifeng Zheng <zhenglifeng1@huawei.com>
On Wed, Apr 16, 2025 at 10:47 AM zhenglifeng (A)
<zhenglifeng1@huawei.com> wrote:
>
> On 2025/4/16 13:29, Viresh Kumar wrote:
>
> > The boost-related code in cpufreq has undergone several changes over the
> > years, but this particular piece remained unchanged and is now outdated.
> >
> > The cpufreq core currently manages boost settings during initialization,
> > and only when necessary. As such, there's no longer a need to enable
> > boost explicitly when entering system suspend.
> >
> > Previously, this wasn’t causing issues because boost settings were
> > force-updated during policy initialization. However, commit 2b16c631832d
> > ("cpufreq: ACPI: Remove set_boost in acpi_cpufreq_cpu_init()") changed
> > that behavior—correctly—by avoiding unnecessary updates.
> >
> > As a result of this change, if boost was disabled prior to suspend, it
> > remains disabled on resume—as expected. But due to the current code
> > forcibly enabling boost at suspend time, the system ends up with boost
> > frequencies enabled after resume, even if the global boost flag was
> > disabled. This contradicts the intended behavior.
> >
> > Fix this by not enabling boost on policy exit.
> >
> > Fixes: 2b16c631832d ("cpufreq: ACPI: Remove set_boost in acpi_cpufreq_cpu_init()")
> > Closes: https://bugzilla.kernel.org/show_bug.cgi?id=220013
> > Reported-by: Nicholas Chin <nic.c3.14@gmail.com>
> > Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
> > ---
> > drivers/cpufreq/acpi-cpufreq.c | 23 +++--------------------
> > 1 file changed, 3 insertions(+), 20 deletions(-)
> >
> > diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
> > index 924314cdeebc..85b5a88f723f 100644
> > --- a/drivers/cpufreq/acpi-cpufreq.c
> > +++ b/drivers/cpufreq/acpi-cpufreq.c
> > @@ -89,8 +89,9 @@ static bool boost_state(unsigned int cpu)
> > return false;
> > }
> >
> > -static int boost_set_msr(bool enable)
> > +static void boost_set_msr_each(void *p_en)
> > {
> > + bool enable = (bool) p_en;
> > u32 msr_addr;
> > u64 msr_mask, val;
> >
> > @@ -107,7 +108,7 @@ static int boost_set_msr(bool enable)
> > msr_mask = MSR_K7_HWCR_CPB_DIS;
> > break;
> > default:
> > - return -EINVAL;
> > + return;
> > }
> >
> > rdmsrl(msr_addr, val);
> > @@ -118,14 +119,6 @@ static int boost_set_msr(bool enable)
> > val |= msr_mask;
> >
> > wrmsrl(msr_addr, val);
> > - return 0;
> > -}
> > -
> > -static void boost_set_msr_each(void *p_en)
> > -{
> > - bool enable = (bool) p_en;
> > -
> > - boost_set_msr(enable);
> > }
> >
> > static int set_boost(struct cpufreq_policy *policy, int val)
> > @@ -532,15 +525,6 @@ static void free_acpi_perf_data(void)
> > free_percpu(acpi_perf_data);
> > }
> >
> > -static int cpufreq_boost_down_prep(unsigned int cpu)
> > -{
> > - /*
> > - * Clear the boost-disable bit on the CPU_DOWN path so that
> > - * this cpu cannot block the remaining ones from boosting.
> > - */
> > - return boost_set_msr(1);
> > -}
> > -
> > /*
> > * acpi_cpufreq_early_init - initialize ACPI P-States library
> > *
> > @@ -931,7 +915,6 @@ static void acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy)
> >
> > pr_debug("%s\n", __func__);
> >
> > - cpufreq_boost_down_prep(policy->cpu);
> > policy->fast_switch_possible = false;
> > policy->driver_data = NULL;
> > acpi_processor_unregister_performance(data->acpi_perf_cpu);
>
> Nice!
>
> I wonder why this cpufreq_boost_down_prep() was needed at the beginning.
>
> Reviewed-by: Lifeng Zheng <zhenglifeng1@huawei.com>
Applied as 6.15-rc material, thanks!
© 2016 - 2025 Red Hat, Inc.