For cpus sharing one cpufreq domain, cpufreq_driver.init() is
only invoked on the firstcpu, so current per-CPU hwp driver data
struct hwp_drv_data{} actually fails to be allocated for cpus other than the
first one. There is no need to make it per-CPU.
We embed struct hwp_drv_data{} into struct cpufreq_policy{}, then cpus could
share the hwp driver data allocated for the firstcpu, like the way they share
struct cpufreq_policy{}. We also make it a union, with "hwp", and later
"amd-cppc" as a sub-struct.
Suggested-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Penny Zheng <Penny.Zheng@amd.com>
---
v8 -> v9:
- new commit
---
xen/arch/x86/acpi/cpufreq/hwp.c | 32 +++++++++++++-----------------
xen/include/acpi/cpufreq/cpufreq.h | 6 ++++++
2 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/xen/arch/x86/acpi/cpufreq/hwp.c b/xen/arch/x86/acpi/cpufreq/hwp.c
index 240491c96a..5c98f3eb3e 100644
--- a/xen/arch/x86/acpi/cpufreq/hwp.c
+++ b/xen/arch/x86/acpi/cpufreq/hwp.c
@@ -67,7 +67,6 @@ struct hwp_drv_data
uint8_t desired;
uint8_t energy_perf;
};
-static DEFINE_PER_CPU_READ_MOSTLY(struct hwp_drv_data *, hwp_drv_data);
#define hwp_err(cpu, fmt, args...) \
printk(XENLOG_ERR "HWP: CPU%u error: " fmt, cpu, ## args)
@@ -224,7 +223,7 @@ static bool __init hwp_available(void)
static int cf_check hwp_cpufreq_verify(struct cpufreq_policy *policy)
{
- struct hwp_drv_data *data = per_cpu(hwp_drv_data, policy->cpu);
+ struct hwp_drv_data *data = policy->u.hwp;
if ( !feature_hwp_activity_window && data->activity_window )
{
@@ -239,7 +238,7 @@ static int cf_check hwp_cpufreq_verify(struct cpufreq_policy *policy)
static void cf_check hwp_write_request(void *info)
{
const struct cpufreq_policy *policy = info;
- struct hwp_drv_data *data = this_cpu(hwp_drv_data);
+ struct hwp_drv_data *data = policy->u.hwp;
union hwp_request hwp_req = data->curr_req;
data->ret = 0;
@@ -259,7 +258,7 @@ static int cf_check hwp_cpufreq_target(struct cpufreq_policy *policy,
unsigned int relation)
{
unsigned int cpu = policy->cpu;
- struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
+ struct hwp_drv_data *data = policy->u.hwp;
/* Zero everything to ensure reserved bits are zero... */
union hwp_request hwp_req = { .raw = 0 };
@@ -350,7 +349,7 @@ static void hwp_get_cpu_speeds(struct cpufreq_policy *policy)
static void cf_check hwp_init_msrs(void *info)
{
struct cpufreq_policy *policy = info;
- struct hwp_drv_data *data = this_cpu(hwp_drv_data);
+ struct hwp_drv_data *data = policy->u.hwp;
uint64_t val;
/*
@@ -426,15 +425,14 @@ static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy)
policy->governor = &cpufreq_gov_hwp;
- per_cpu(hwp_drv_data, cpu) = data;
+ policy->u.hwp = data;
on_selected_cpus(cpumask_of(cpu), hwp_init_msrs, policy, 1);
if ( data->curr_req.raw == -1 )
{
hwp_err(cpu, "Could not initialize HWP properly\n");
- per_cpu(hwp_drv_data, cpu) = NULL;
- xfree(data);
+ XFREE(policy->u.hwp);
return -ENODEV;
}
@@ -462,10 +460,8 @@ static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy)
static int cf_check hwp_cpufreq_cpu_exit(struct cpufreq_policy *policy)
{
- struct hwp_drv_data *data = per_cpu(hwp_drv_data, policy->cpu);
-
- per_cpu(hwp_drv_data, policy->cpu) = NULL;
- xfree(data);
+ if ( policy->u.hwp )
+ XFREE(policy->u.hwp);
return 0;
}
@@ -480,7 +476,7 @@ static int cf_check hwp_cpufreq_cpu_exit(struct cpufreq_policy *policy)
static void cf_check hwp_set_misc_turbo(void *info)
{
const struct cpufreq_policy *policy = info;
- struct hwp_drv_data *data = per_cpu(hwp_drv_data, policy->cpu);
+ struct hwp_drv_data *data = policy->u.hwp;
uint64_t msr;
data->ret = 0;
@@ -511,7 +507,7 @@ static int cf_check hwp_cpufreq_update(unsigned int cpu, struct cpufreq_policy *
{
on_selected_cpus(cpumask_of(cpu), hwp_set_misc_turbo, policy, 1);
- return per_cpu(hwp_drv_data, cpu)->ret;
+ return policy->u.hwp->ret;
}
#endif /* CONFIG_PM_OP */
@@ -531,9 +527,10 @@ hwp_cpufreq_driver = {
int get_hwp_para(unsigned int cpu,
struct xen_get_cppc_para *cppc_para)
{
- const struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
+ const struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_policy, cpu);
+ const struct hwp_drv_data *data;
- if ( data == NULL )
+ if ( !policy || !(data = policy->u.hwp) )
return -ENODATA;
cppc_para->features =
@@ -554,8 +551,7 @@ int get_hwp_para(unsigned int cpu,
int set_hwp_para(struct cpufreq_policy *policy,
struct xen_set_cppc_para *set_cppc)
{
- unsigned int cpu = policy->cpu;
- struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu);
+ struct hwp_drv_data *data = policy->u.hwp;
bool cleared_act_window = false;
if ( data == NULL )
diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h
index 5d4881eea8..c0ecd690c5 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -62,6 +62,7 @@ struct perf_limits {
uint32_t min_policy_pct;
};
+struct hwp_drv_data;
struct cpufreq_policy {
cpumask_var_t cpus; /* affected CPUs */
unsigned int shared_type; /* ANY or ALL affected CPUs
@@ -81,6 +82,11 @@ struct cpufreq_policy {
int8_t turbo; /* tristate flag: 0 for unsupported
* -1 for disable, 1 for enabled
* See CPUFREQ_TURBO_* below for defines */
+ union {
+#ifdef CONFIG_INTEL
+ struct hwp_drv_data *hwp; /* Driver data for Intel HWP */
+#endif
+ } u;
};
DECLARE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_policy);
--
2.34.1
On 04.09.2025 08:35, Penny Zheng wrote: > For cpus sharing one cpufreq domain, cpufreq_driver.init() is > only invoked on the firstcpu, so current per-CPU hwp driver data > struct hwp_drv_data{} actually fails to be allocated for cpus other than the > first one. There is no need to make it per-CPU. > We embed struct hwp_drv_data{} into struct cpufreq_policy{}, then cpus could > share the hwp driver data allocated for the firstcpu, like the way they share > struct cpufreq_policy{}. We also make it a union, with "hwp", and later > "amd-cppc" as a sub-struct. And ACPI, as per my patch (which then will need re-basing). > Suggested-by: Jan Beulich <jbeulich@suse.com> Not quite, this really is Reported-by: as it's a bug you fix, and in turn it also wants to gain a Fixes: tag. This also will need backporting. It would also have been nice if you had Cc-ed Jason right away, seeing that this code was all written by him. > @@ -259,7 +258,7 @@ static int cf_check hwp_cpufreq_target(struct cpufreq_policy *policy, > unsigned int relation) > { > unsigned int cpu = policy->cpu; > - struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu); > + struct hwp_drv_data *data = policy->u.hwp; > /* Zero everything to ensure reserved bits are zero... */ > union hwp_request hwp_req = { .raw = 0 }; Further down in this same function we have on_selected_cpus(cpumask_of(cpu), hwp_write_request, policy, 1); That's similarly problematic when the CPU denoted by policy->cpu isn't online anymore. (It's not quite clear whether all related issues would want fixing together, or in multiple patches.) > @@ -350,7 +349,7 @@ static void hwp_get_cpu_speeds(struct cpufreq_policy *policy) > static void cf_check hwp_init_msrs(void *info) > { > struct cpufreq_policy *policy = info; > - struct hwp_drv_data *data = this_cpu(hwp_drv_data); > + struct hwp_drv_data *data = policy->u.hwp; > uint64_t val; > > /* > @@ -426,15 +425,14 @@ static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy) > > policy->governor = &cpufreq_gov_hwp; > > - per_cpu(hwp_drv_data, cpu) = data; > + policy->u.hwp = data; > > on_selected_cpus(cpumask_of(cpu), hwp_init_msrs, policy, 1); If multiple CPUs are in a domain, not all of them will make it here. By implication the MSRs accessed by hwp_init_msrs() would need to have wider than thread scope. The SDM, afaics, says nothing either way in this regard in the Architectural MSRs section. Later model-specific tables have some data. Which gets me back to my original question: Is "sharing" actually possible for HWP? Note further how there are both HWP_REQUEST and HWP_REQUEST_PKG MSRs, for example. Which one is (to be) used looks to be controlled by HWP_CTL.PKG_CTL_POLARITY. > @@ -462,10 +460,8 @@ static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy) > > static int cf_check hwp_cpufreq_cpu_exit(struct cpufreq_policy *policy) > { > - struct hwp_drv_data *data = per_cpu(hwp_drv_data, policy->cpu); > - > - per_cpu(hwp_drv_data, policy->cpu) = NULL; > - xfree(data); > + if ( policy->u.hwp ) > + XFREE(policy->u.hwp); No if() needed here. > @@ -480,7 +476,7 @@ static int cf_check hwp_cpufreq_cpu_exit(struct cpufreq_policy *policy) > static void cf_check hwp_set_misc_turbo(void *info) > { > const struct cpufreq_policy *policy = info; > - struct hwp_drv_data *data = per_cpu(hwp_drv_data, policy->cpu); > + struct hwp_drv_data *data = policy->u.hwp; > uint64_t msr; > > data->ret = 0; > @@ -511,7 +507,7 @@ static int cf_check hwp_cpufreq_update(unsigned int cpu, struct cpufreq_policy * > { > on_selected_cpus(cpumask_of(cpu), hwp_set_misc_turbo, policy, 1); > > - return per_cpu(hwp_drv_data, cpu)->ret; > + return policy->u.hwp->ret; > } > #endif /* CONFIG_PM_OP */ Same concern here wrt MSR scope. MISC_ENABLE.TURBO_DISENGAGE's scope is package as per the few tables which have the bit explicitly explained; whether that extends to all models is unclear. > --- a/xen/include/acpi/cpufreq/cpufreq.h > +++ b/xen/include/acpi/cpufreq/cpufreq.h > @@ -62,6 +62,7 @@ struct perf_limits { > uint32_t min_policy_pct; > }; > > +struct hwp_drv_data; This shouldn't be needed. > @@ -81,6 +82,11 @@ struct cpufreq_policy { > int8_t turbo; /* tristate flag: 0 for unsupported > * -1 for disable, 1 for enabled > * See CPUFREQ_TURBO_* below for defines */ > + union { > +#ifdef CONFIG_INTEL > + struct hwp_drv_data *hwp; /* Driver data for Intel HWP */ > +#endif While it may make for a smaller diff, ultimately I think we don't want this to be a pointer, much like I've done in my patch for the ACPI driver. > + } u; This wants to either not have a name at all, or be named e.g. drv_data. Jan
On 2025-09-04 07:50, Jan Beulich wrote: > On 04.09.2025 08:35, Penny Zheng wrote: >> For cpus sharing one cpufreq domain, cpufreq_driver.init() is >> only invoked on the firstcpu, so current per-CPU hwp driver data >> struct hwp_drv_data{} actually fails to be allocated for cpus other than the >> first one. >> There is no need to make it per-CPU.>> We embed struct hwp_drv_data{} into struct cpufreq_policy{}, then cpus could >> share the hwp driver data allocated for the firstcpu, like the way they share >> struct cpufreq_policy{}. We also make it a union, with "hwp", and later >> "amd-cppc" as a sub-struct. > > And ACPI, as per my patch (which then will need re-basing). > >> Suggested-by: Jan Beulich <jbeulich@suse.com> > > Not quite, this really is Reported-by: as it's a bug you fix, and in turn it > also wants to gain a Fixes: tag. This also will need backporting. > > It would also have been nice if you had Cc-ed Jason right away, seeing that > this code was all written by him. > >> @@ -259,7 +258,7 @@ static int cf_check hwp_cpufreq_target(struct cpufreq_policy *policy, >> unsigned int relation) >> { >> unsigned int cpu = policy->cpu; >> - struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu); >> + struct hwp_drv_data *data = policy->u.hwp; >> /* Zero everything to ensure reserved bits are zero... */ >> union hwp_request hwp_req = { .raw = 0 }; > > Further down in this same function we have > > on_selected_cpus(cpumask_of(cpu), hwp_write_request, policy, 1); > > That's similarly problematic when the CPU denoted by policy->cpu isn't > online anymore. (It's not quite clear whether all related issues would > want fixing together, or in multiple patches.) > >> @@ -350,7 +349,7 @@ static void hwp_get_cpu_speeds(struct cpufreq_policy *policy) >> static void cf_check hwp_init_msrs(void *info) >> { >> struct cpufreq_policy *policy = info; >> - struct hwp_drv_data *data = this_cpu(hwp_drv_data); >> + struct hwp_drv_data *data = policy->u.hwp; >> uint64_t val; >> >> /* >> @@ -426,15 +425,14 @@ static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy) >> >> policy->governor = &cpufreq_gov_hwp; >> >> - per_cpu(hwp_drv_data, cpu) = data; >> + policy->u.hwp = data; >> >> on_selected_cpus(cpumask_of(cpu), hwp_init_msrs, policy, 1); > > If multiple CPUs are in a domain, not all of them will make it here. By > implication the MSRs accessed by hwp_init_msrs() would need to have wider > than thread scope. The SDM, afaics, says nothing either way in this regard > in the Architectural MSRs section. Later model-specific tables have some > data. When I wrote the HWP driver, I expected there to be per-cpu hwp_drv_data. policy->cpu looked like the correct way to identify each CPU. I was unaware of the idea of cpufreq_domains, and didn't intend there to be any sharing. > Which gets me back to my original question: Is "sharing" actually possible > for HWP? Note further how there are both HWP_REQUEST and HWP_REQUEST_PKG > MSRs, for example. Which one is (to be) used looks to be controlled by > HWP_CTL.PKG_CTL_POLARITY. I was aware of the Package Level MSRs, but chose not to support them. Topology information didn't seem readily available to the driver, and using non-Package Level MSRs is needed for backwards compatibility anyway. I don't have access to an HWP system, so I cannot check if processors share a domain. I'd feel a little silly if I only ever wrote to CPU 0 :/ I have no proof, but I want to say that at some point I had debug statements and saw hwp_cpufreq_target() called for each CPU. Maybe forcing hw_all=1 in cpufreq_add_cpu()/cpufreq_del_cpu() would ensure per-cpu policies? Regards, Jason
On 04.09.2025 20:53, Jason Andryuk wrote: > On 2025-09-04 07:50, Jan Beulich wrote: >> On 04.09.2025 08:35, Penny Zheng wrote: >>> @@ -259,7 +258,7 @@ static int cf_check hwp_cpufreq_target(struct cpufreq_policy *policy, >>> unsigned int relation) >>> { >>> unsigned int cpu = policy->cpu; >>> - struct hwp_drv_data *data = per_cpu(hwp_drv_data, cpu); >>> + struct hwp_drv_data *data = policy->u.hwp; >>> /* Zero everything to ensure reserved bits are zero... */ >>> union hwp_request hwp_req = { .raw = 0 }; >> >> Further down in this same function we have >> >> on_selected_cpus(cpumask_of(cpu), hwp_write_request, policy, 1); >> >> That's similarly problematic when the CPU denoted by policy->cpu isn't >> online anymore. (It's not quite clear whether all related issues would >> want fixing together, or in multiple patches.) >> >>> @@ -350,7 +349,7 @@ static void hwp_get_cpu_speeds(struct cpufreq_policy *policy) >>> static void cf_check hwp_init_msrs(void *info) >>> { >>> struct cpufreq_policy *policy = info; >>> - struct hwp_drv_data *data = this_cpu(hwp_drv_data); >>> + struct hwp_drv_data *data = policy->u.hwp; >>> uint64_t val; >>> >>> /* >>> @@ -426,15 +425,14 @@ static int cf_check hwp_cpufreq_cpu_init(struct cpufreq_policy *policy) >>> >>> policy->governor = &cpufreq_gov_hwp; >>> >>> - per_cpu(hwp_drv_data, cpu) = data; >>> + policy->u.hwp = data; >>> >>> on_selected_cpus(cpumask_of(cpu), hwp_init_msrs, policy, 1); >> >> If multiple CPUs are in a domain, not all of them will make it here. By >> implication the MSRs accessed by hwp_init_msrs() would need to have wider >> than thread scope. The SDM, afaics, says nothing either way in this regard >> in the Architectural MSRs section. Later model-specific tables have some >> data. > > When I wrote the HWP driver, I expected there to be per-cpu > hwp_drv_data. policy->cpu looked like the correct way to identify each > CPU. I was unaware of the idea of cpufreq_domains, and didn't intend > there to be any sharing. > >> Which gets me back to my original question: Is "sharing" actually possible >> for HWP? Note further how there are both HWP_REQUEST and HWP_REQUEST_PKG >> MSRs, for example. Which one is (to be) used looks to be controlled by >> HWP_CTL.PKG_CTL_POLARITY. > > I was aware of the Package Level MSRs, but chose not to support them. > Topology information didn't seem readily available to the driver, and > using non-Package Level MSRs is needed for backwards compatibility anyway. > > I don't have access to an HWP system, so I cannot check if processors > share a domain. I'd feel a little silly if I only ever wrote to CPU 0 :/ > > I have no proof, but I want to say that at some point I had debug > statements and saw hwp_cpufreq_target() called for each CPU. > > Maybe forcing hw_all=1 in cpufreq_add_cpu()/cpufreq_del_cpu() would > ensure per-cpu policies? No, domain info is handed to us from ACPI (_PSD). That's what cpufreq_add_cpu() evaluates. Therefore if there was evidence that HWP (and CPPC) can only ever have single-CPU domains, we could refuse such _PSD being handed to us (ideally already in set_px_pminfo()). But I don't think we can just go and override what we were told. I fear though that the mere existence of a package-level (alternative) MSR suggests that such configurations might be possible. Jan
© 2016 - 2025 Red Hat, Inc.