For cpus sharing one cpufreq domain, cpufreq_driver.init() is
only invoked on the firstcpu, so current per-CPU hwp driver data
struct hwp_drv_data{} actually fails to be allocated for cpus other than the
first one. There is no need to make it per-CPU.
We embed struct hwp_drv_data{} into struct cpufreq_policy{}, then cpus could
share the hwp driver data allocated for the firstcpu, like the way they share
struct cpufreq_policy{}. We also make it a union, with "hwp", and later
"amd-cppc" as a sub-struct.
Suggested-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Penny Zheng <Penny.Zheng@amd.com>
---
v8 -> v9:
- new commit
---
xen/arch/x86/acpi/cpufreq/hwp.c | 32 +++++++++++++-----------------
xen/include/acpi/cpufreq/cpufreq.h | 6 ++++++
2 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/xen/arch/x86/acpi/cpufreq/hwp.c b/xen/arch/x86/acpi/cpufreq/hwp.c
index 240491c96a..5c98f3eb3e 100644
--- a/xen/arch/x86/acpi/cpufreq/hwp.c
+++ b/xen/arch/x86/acpi/cpufreq/hwp.c
@@ -67,7 +67,6 @@ struct hwp_drv_data
uint8_t desired;
uint8_t energy_perf;
};
-static DEFINE_PER_CPU_READ_MOSTLY(struct hwp_drv_data *, hwp_drv_data);
#define hwp_err(cpu, fmt, args...) \
printk(XENLOG_ERR "HWP: CPU%u error: " fmt, cpu, ## args)
@@ -224,7 +223,7 @@ static bool __init hwp_available(void)
static int cf_check hwp_cpufreq_verify(struct cpufreq_policy *policy)
{
- struct hwp_drv_data *data = per_cpu(hwp_drv_data, policy->cpu);
+ struct hwp_drv_data *data = policy->u.hwp;
if ( !feature_hwp_activity_window && data->activity_window )
{
@@ -239,7 +238,7 @@ static int cf_check hwp_cpufreq_verify(struct cpufreq_policy *policy)
static void cf_check hwp_write_request(void *info)
{
const struct cpufreq_policy *policy = info;
- struct hwp_drv_data *data = this_cpu(hwp_drv_data);
+ struct hwp_drv_data *data = policy->u.hwp;
union hwp_request hwp_req = data->curr_req;
data->ret = 0;
@@ -259,7 +258,7 @@ static int cf_check hwp_cpufreq_target(struct cpufreq_policy *policy,
unsigned int relation)
{
unsigned int cpu = policy->cpu;
- struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
+ struct hwp_drv_data *data = policy->u.hwp;
/* Zero everything to ensure reserved bits are zero... */
union hwp_request hwp_req = { .raw = 0 };
@@ -350,7 +349,7 @@ static void hwp_get_cpu_speeds(struct cpufreq_policy *policy)
static void cf_check hwp_init_msrs(void *info)
{
struct cpufreq_policy *policy = info;
- struct hwp_drv_data *data = this_cpu(hwp_drv_data);
+ struct hwp_drv_data *data = policy->u.hwp;
uint64_t val;
/*
@@ -426,15 +425,14 @@ static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy)
policy->governor = &cpufreq_gov_hwp;
- per_cpu(hwp_drv_data, cpu) = data;
+ policy->u.hwp = data;
on_selected_cpus(cpumask_of(cpu), hwp_init_msrs, policy, 1);
if ( data->curr_req.raw == -1 )
{
hwp_err(cpu, "Could not initialize HWP properly\n");
- per_cpu(hwp_drv_data, cpu) = NULL;
- xfree(data);
+ XFREE(policy->u.hwp);
return -ENODEV;
}
@@ -462,10 +460,8 @@ static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy)
static int cf_check hwp_cpufreq_cpu_exit(struct cpufreq_policy *policy)
{
- struct hwp_drv_data *data = per_cpu(hwp_drv_data, policy->cpu);
-
- per_cpu(hwp_drv_data, policy->cpu) = NULL;
- xfree(data);
+ if ( policy->u.hwp )
+ XFREE(policy->u.hwp);
return 0;
}
@@ -480,7 +476,7 @@ static int cf_check hwp_cpufreq_cpu_exit(struct cpufreq_policy *policy)
static void cf_check hwp_set_misc_turbo(void *info)
{
const struct cpufreq_policy *policy = info;
- struct hwp_drv_data *data = per_cpu(hwp_drv_data, policy->cpu);
+ struct hwp_drv_data *data = policy->u.hwp;
uint64_t msr;
data->ret = 0;
@@ -511,7 +507,7 @@ static int cf_check hwp_cpufreq_update(unsigned int cpu, struct cpufreq_policy *
{
on_selected_cpus(cpumask_of(cpu), hwp_set_misc_turbo, policy, 1);
- return per_cpu(hwp_drv_data, cpu)->ret;
+ return policy->u.hwp->ret;
}
#endif /* CONFIG_PM_OP */
@@ -531,9 +527,10 @@ hwp_cpufreq_driver = {
int get_hwp_para(unsigned int cpu,
struct xen_get_cppc_para *cppc_para)
{
- const struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
+ const struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_policy, cpu);
+ const struct hwp_drv_data *data;
- if ( data == NULL )
+ if ( !policy || !(data = policy->u.hwp) )
return -ENODATA;
cppc_para->features =
@@ -554,8 +551,7 @@ int get_hwp_para(unsigned int cpu,
int set_hwp_para(struct cpufreq_policy *policy,
struct xen_set_cppc_para *set_cppc)
{
- unsigned int cpu = policy->cpu;
- struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
+ struct hwp_drv_data *data = policy->u.hwp;
bool cleared_act_window = false;
if ( data == NULL )
diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h
index 5d4881eea8..c0ecd690c5 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -62,6 +62,7 @@ struct perf_limits {
uint32_t min_policy_pct;
};
+struct hwp_drv_data;
struct cpufreq_policy {
cpumask_var_t cpus; /* affected CPUs */
unsigned int shared_type; /* ANY or ALL affected CPUs
@@ -81,6 +82,11 @@ struct cpufreq_policy {
int8_t turbo; /* tristate flag: 0 for unsupported
* -1 for disable, 1 for enabled
* See CPUFREQ_TURBO_* below for defines */
+ union {
+#ifdef CONFIG_INTEL
+ struct hwp_drv_data *hwp; /* Driver data for Intel HWP */
+#endif
+ } u;
};
DECLARE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_policy);
--
2.34.1
On 04.09.2025 08:35, Penny Zheng wrote:
> For cpus sharing one cpufreq domain, cpufreq_driver.init() is
> only invoked on the firstcpu, so current per-CPU hwp driver data
> struct hwp_drv_data{} actually fails to be allocated for cpus other than the
> first one. There is no need to make it per-CPU.
> We embed struct hwp_drv_data{} into struct cpufreq_policy{}, then cpus could
> share the hwp driver data allocated for the firstcpu, like the way they share
> struct cpufreq_policy{}. We also make it a union, with "hwp", and later
> "amd-cppc" as a sub-struct.
And ACPI, as per my patch (which then will need re-basing).
> Suggested-by: Jan Beulich <jbeulich@suse.com>
Not quite, this really is Reported-by: as it's a bug you fix, and in turn it
also wants to gain a Fixes: tag. This also will need backporting.
It would also have been nice if you had Cc-ed Jason right away, seeing that
this code was all written by him.
> @@ -259,7 +258,7 @@ static int cf_check hwp_cpufreq_target(struct cpufreq_policy *policy,
> unsigned int relation)
> {
> unsigned int cpu = policy->cpu;
> - struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
> + struct hwp_drv_data *data = policy->u.hwp;
> /* Zero everything to ensure reserved bits are zero... */
> union hwp_request hwp_req = { .raw = 0 };
Further down in this same function we have
on_selected_cpus(cpumask_of(cpu), hwp_write_request, policy, 1);
That's similarly problematic when the CPU denoted by policy->cpu isn't
online anymore. (It's not quite clear whether all related issues would
want fixing together, or in multiple patches.)
> @@ -350,7 +349,7 @@ static void hwp_get_cpu_speeds(struct cpufreq_policy *policy)
> static void cf_check hwp_init_msrs(void *info)
> {
> struct cpufreq_policy *policy = info;
> - struct hwp_drv_data *data = this_cpu(hwp_drv_data);
> + struct hwp_drv_data *data = policy->u.hwp;
> uint64_t val;
>
> /*
> @@ -426,15 +425,14 @@ static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy)
>
> policy->governor = &cpufreq_gov_hwp;
>
> - per_cpu(hwp_drv_data, cpu) = data;
> + policy->u.hwp = data;
>
> on_selected_cpus(cpumask_of(cpu), hwp_init_msrs, policy, 1);
If multiple CPUs are in a domain, not all of them will make it here. By
implication the MSRs accessed by hwp_init_msrs() would need to have wider
than thread scope. The SDM, afaics, says nothing either way in this regard
in the Architectural MSRs section. Later model-specific tables have some
data.
Which gets me back to my original question: Is "sharing" actually possible
for HWP? Note further how there are both HWP_REQUEST and HWP_REQUEST_PKG
MSRs, for example. Which one is (to be) used looks to be controlled by
HWP_CTL.PKG_CTL_POLARITY.
> @@ -462,10 +460,8 @@ static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy)
>
> static int cf_check hwp_cpufreq_cpu_exit(struct cpufreq_policy *policy)
> {
> - struct hwp_drv_data *data = per_cpu(hwp_drv_data, policy->cpu);
> -
> - per_cpu(hwp_drv_data, policy->cpu) = NULL;
> - xfree(data);
> + if ( policy->u.hwp )
> + XFREE(policy->u.hwp);
No if() needed here.
> @@ -480,7 +476,7 @@ static int cf_check hwp_cpufreq_cpu_exit(struct cpufreq_policy *policy)
> static void cf_check hwp_set_misc_turbo(void *info)
> {
> const struct cpufreq_policy *policy = info;
> - struct hwp_drv_data *data = per_cpu(hwp_drv_data, policy->cpu);
> + struct hwp_drv_data *data = policy->u.hwp;
> uint64_t msr;
>
> data->ret = 0;
> @@ -511,7 +507,7 @@ static int cf_check hwp_cpufreq_update(unsigned int cpu, struct cpufreq_policy *
> {
> on_selected_cpus(cpumask_of(cpu), hwp_set_misc_turbo, policy, 1);
>
> - return per_cpu(hwp_drv_data, cpu)->ret;
> + return policy->u.hwp->ret;
> }
> #endif /* CONFIG_PM_OP */
Same concern here wrt MSR scope. MISC_ENABLE.TURBO_DISENGAGE's scope is
package as per the few tables which have the bit explicitly explained;
whether that extends to all models is unclear.
> --- a/xen/include/acpi/cpufreq/cpufreq.h
> +++ b/xen/include/acpi/cpufreq/cpufreq.h
> @@ -62,6 +62,7 @@ struct perf_limits {
> uint32_t min_policy_pct;
> };
>
> +struct hwp_drv_data;
This shouldn't be needed.
> @@ -81,6 +82,11 @@ struct cpufreq_policy {
> int8_t turbo; /* tristate flag: 0 for unsupported
> * -1 for disable, 1 for enabled
> * See CPUFREQ_TURBO_* below for defines */
> + union {
> +#ifdef CONFIG_INTEL
> + struct hwp_drv_data *hwp; /* Driver data for Intel HWP */
> +#endif
While it may make for a smaller diff, ultimately I think we don't want
this to be a pointer, much like I've done in my patch for the ACPI driver.
> + } u;
This wants to either not have a name at all, or be named e.g. drv_data.
Jan
On 2025-09-04 07:50, Jan Beulich wrote:
> On 04.09.2025 08:35, Penny Zheng wrote:
>> For cpus sharing one cpufreq domain, cpufreq_driver.init() is
>> only invoked on the firstcpu, so current per-CPU hwp driver data
>> struct hwp_drv_data{} actually fails to be allocated for cpus other than the
>> first one.
>> There is no need to make it per-CPU.>> We embed struct
hwp_drv_data{} into struct cpufreq_policy{}, then cpus could
>> share the hwp driver data allocated for the firstcpu, like the way they share
>> struct cpufreq_policy{}. We also make it a union, with "hwp", and later
>> "amd-cppc" as a sub-struct.
>
> And ACPI, as per my patch (which then will need re-basing).
>
>> Suggested-by: Jan Beulich <jbeulich@suse.com>
>
> Not quite, this really is Reported-by: as it's a bug you fix, and in turn it
> also wants to gain a Fixes: tag. This also will need backporting.
>
> It would also have been nice if you had Cc-ed Jason right away, seeing that
> this code was all written by him.
>
>> @@ -259,7 +258,7 @@ static int cf_check hwp_cpufreq_target(struct cpufreq_policy *policy,
>> unsigned int relation)
>> {
>> unsigned int cpu = policy->cpu;
>> - struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
>> + struct hwp_drv_data *data = policy->u.hwp;
>> /* Zero everything to ensure reserved bits are zero... */
>> union hwp_request hwp_req = { .raw = 0 };
>
> Further down in this same function we have
>
> on_selected_cpus(cpumask_of(cpu), hwp_write_request, policy, 1);
>
> That's similarly problematic when the CPU denoted by policy->cpu isn't
> online anymore. (It's not quite clear whether all related issues would
> want fixing together, or in multiple patches.)
>
>> @@ -350,7 +349,7 @@ static void hwp_get_cpu_speeds(struct cpufreq_policy *policy)
>> static void cf_check hwp_init_msrs(void *info)
>> {
>> struct cpufreq_policy *policy = info;
>> - struct hwp_drv_data *data = this_cpu(hwp_drv_data);
>> + struct hwp_drv_data *data = policy->u.hwp;
>> uint64_t val;
>>
>> /*
>> @@ -426,15 +425,14 @@ static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy)
>>
>> policy->governor = &cpufreq_gov_hwp;
>>
>> - per_cpu(hwp_drv_data, cpu) = data;
>> + policy->u.hwp = data;
>>
>> on_selected_cpus(cpumask_of(cpu), hwp_init_msrs, policy, 1);
>
> If multiple CPUs are in a domain, not all of them will make it here. By
> implication the MSRs accessed by hwp_init_msrs() would need to have wider
> than thread scope. The SDM, afaics, says nothing either way in this regard
> in the Architectural MSRs section. Later model-specific tables have some
> data.
When I wrote the HWP driver, I expected there to be per-cpu
hwp_drv_data. policy->cpu looked like the correct way to identify each
CPU. I was unaware of the idea of cpufreq_domains, and didn't intend
there to be any sharing.
> Which gets me back to my original question: Is "sharing" actually possible
> for HWP? Note further how there are both HWP_REQUEST and HWP_REQUEST_PKG
> MSRs, for example. Which one is (to be) used looks to be controlled by
> HWP_CTL.PKG_CTL_POLARITY.
I was aware of the Package Level MSRs, but chose not to support them.
Topology information didn't seem readily available to the driver, and
using non-Package Level MSRs is needed for backwards compatibility anyway.
I don't have access to an HWP system, so I cannot check if processors
share a domain. I'd feel a little silly if I only ever wrote to CPU 0 :/
I have no proof, but I want to say that at some point I had debug
statements and saw hwp_cpufreq_target() called for each CPU.
Maybe forcing hw_all=1 in cpufreq_add_cpu()/cpufreq_del_cpu() would
ensure per-cpu policies?
Regards,
Jason
On 04.09.2025 20:53, Jason Andryuk wrote:
> On 2025-09-04 07:50, Jan Beulich wrote:
>> On 04.09.2025 08:35, Penny Zheng wrote:
>>> @@ -259,7 +258,7 @@ static int cf_check hwp_cpufreq_target(struct cpufreq_policy *policy,
>>> unsigned int relation)
>>> {
>>> unsigned int cpu = policy->cpu;
>>> - struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
>>> + struct hwp_drv_data *data = policy->u.hwp;
>>> /* Zero everything to ensure reserved bits are zero... */
>>> union hwp_request hwp_req = { .raw = 0 };
>>
>> Further down in this same function we have
>>
>> on_selected_cpus(cpumask_of(cpu), hwp_write_request, policy, 1);
>>
>> That's similarly problematic when the CPU denoted by policy->cpu isn't
>> online anymore. (It's not quite clear whether all related issues would
>> want fixing together, or in multiple patches.)
>>
>>> @@ -350,7 +349,7 @@ static void hwp_get_cpu_speeds(struct cpufreq_policy *policy)
>>> static void cf_check hwp_init_msrs(void *info)
>>> {
>>> struct cpufreq_policy *policy = info;
>>> - struct hwp_drv_data *data = this_cpu(hwp_drv_data);
>>> + struct hwp_drv_data *data = policy->u.hwp;
>>> uint64_t val;
>>>
>>> /*
>>> @@ -426,15 +425,14 @@ static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy)
>>>
>>> policy->governor = &cpufreq_gov_hwp;
>>>
>>> - per_cpu(hwp_drv_data, cpu) = data;
>>> + policy->u.hwp = data;
>>>
>>> on_selected_cpus(cpumask_of(cpu), hwp_init_msrs, policy, 1);
>>
>> If multiple CPUs are in a domain, not all of them will make it here. By
>> implication the MSRs accessed by hwp_init_msrs() would need to have wider
>> than thread scope. The SDM, afaics, says nothing either way in this regard
>> in the Architectural MSRs section. Later model-specific tables have some
>> data.
>
> When I wrote the HWP driver, I expected there to be per-cpu
> hwp_drv_data. policy->cpu looked like the correct way to identify each
> CPU. I was unaware of the idea of cpufreq_domains, and didn't intend
> there to be any sharing.
>
>> Which gets me back to my original question: Is "sharing" actually possible
>> for HWP? Note further how there are both HWP_REQUEST and HWP_REQUEST_PKG
>> MSRs, for example. Which one is (to be) used looks to be controlled by
>> HWP_CTL.PKG_CTL_POLARITY.
>
> I was aware of the Package Level MSRs, but chose not to support them.
> Topology information didn't seem readily available to the driver, and
> using non-Package Level MSRs is needed for backwards compatibility anyway.
>
> I don't have access to an HWP system, so I cannot check if processors
> share a domain. I'd feel a little silly if I only ever wrote to CPU 0 :/
>
> I have no proof, but I want to say that at some point I had debug
> statements and saw hwp_cpufreq_target() called for each CPU.
>
> Maybe forcing hw_all=1 in cpufreq_add_cpu()/cpufreq_del_cpu() would
> ensure per-cpu policies?
No, domain info is handed to us from ACPI (_PSD). That's what
cpufreq_add_cpu() evaluates. Therefore if there was evidence that HWP (and
CPPC) can only ever have single-CPU domains, we could refuse such _PSD
being handed to us (ideally already in set_px_pminfo()). But I don't think
we can just go and override what we were told. I fear though that the mere
existence of a package-level (alternative) MSR suggests that such
configurations might be possible.
Jan
© 2016 - 2026 Red Hat, Inc.