drivers/cpufreq/amd-pstate.c | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-)
When working on dynamic asym priority support, it was observed that
"asym_prefer_cpu" on AMD systems supporting Preferred Core ranking
was always the first CPU in the sched group after boot even though it
was not the CPU with the highest asym priority.
"asym_prefer_cpu" is cached when the sched domain hierarchy is
constructed. On AMD systems that support Preferred Core rankings, sched
domains are rebuilt when ITMT support is enabled for the first time from
amd_pstate*_cpu_init().
Since amd_pstate*_cpu_init() is called to initialize the cpudata for
each CPU, the ITMT support is enabled after the first CPU initializes
its asym priority but this is too early since other CPUs have not yet
initialized their asym priorities and the sched domain is rebuilt when
the ITMT support is toggled on for the first time.
Initialize the asym priorities first in amd_pstate*_cpu_init() and then
enable ITMT support only after amd_pstate_register_driver() is finished
to ensure all CPUs have correctly initialized their asym priorities
before sched domain hierarchy is rebuilt and "asym_prefer_cpu" is
cached.
Remove the delayed work mechanism now that ITMT support is only toggled
from the driver init path which is outside the cpuhp critical section.
Fixes: f3a052391822 ("cpufreq: amd-pstate: Enable amd-pstate preferred core support")
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
drivers/cpufreq/amd-pstate.c | 28 ++++++++--------------------
1 file changed, 8 insertions(+), 20 deletions(-)
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index c54c031939c8..ee638589f5f9 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -794,19 +794,9 @@ static void amd_perf_ctl_reset(unsigned int cpu)
wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
}
-/*
- * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
- * due to locking, so queue the work for later.
- */
-static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
-{
- sched_set_itmt_support();
-}
-static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
-
#define CPPC_MAX_PERF U8_MAX
-static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
+static void amd_pstate_init_asym_prio(struct amd_cpudata *cpudata)
{
/* user disabled or not detected */
if (!amd_pstate_prefcore)
@@ -814,14 +804,8 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
cpudata->hw_prefcore = true;
- /*
- * The priorities can be set regardless of whether or not
- * sched_set_itmt_support(true) has been called and it is valid to
- * update them at any time after it has been called.
- */
+ /* The priorities must be initialized before ITMT support can be toggled on. */
sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu);
-
- schedule_work(&sched_prefcore_work);
}
static void amd_pstate_update_limits(unsigned int cpu)
@@ -974,7 +958,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
if (ret)
goto free_cpudata1;
- amd_pstate_init_prefcore(cpudata);
+ amd_pstate_init_asym_prio(cpudata);
ret = amd_pstate_init_freq(cpudata);
if (ret)
@@ -1450,7 +1434,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
if (ret)
goto free_cpudata1;
- amd_pstate_init_prefcore(cpudata);
+ amd_pstate_init_asym_prio(cpudata);
ret = amd_pstate_init_freq(cpudata);
if (ret)
@@ -1780,6 +1764,10 @@ static int __init amd_pstate_init(void)
}
}
+ /* Enable ITMT support once all CPUs have initialized their asym priorities. */
+ if (amd_pstate_prefcore)
+ sched_set_itmt_support();
+
return ret;
global_attr_free:
base-commit: 56a49e19e1aea1374e9ba58cfd40260587bb7355
--
2.34.1
On 4/8/2025 10:00 PM, K Prateek Nayak wrote:
> When working on dynamic asym priority support, it was observed that
> "asym_prefer_cpu" on AMD systems supporting Preferred Core ranking
> was always the first CPU in the sched group after boot even though it
> was not the CPU with the highest asym priority.
>
> "asym_prefer_cpu" is cached when the sched domain hierarchy is
> constructed. On AMD systems that support Preferred Core rankings, sched
> domains are rebuilt when ITMT support is enabled for the first time from
> amd_pstate*_cpu_init().
>
> Since amd_pstate*_cpu_init() is called to initialize the cpudata for
> each CPU, the ITMT support is enabled after the first CPU initializes
> its asym priority but this is too early since other CPUs have not yet
> initialized their asym priorities and the sched domain is rebuilt when
> the ITMT support is toggled on for the first time.
>
> Initialize the asym priorities first in amd_pstate*_cpu_init() and then
> enable ITMT support only after amd_pstate_register_driver() is finished
> to ensure all CPUs have correctly initialized their asym priorities
> before sched domain hierarchy is rebuilt and "asym_prefer_cpu" is
> cached.
>
> Remove the delayed work mechanism now that ITMT support is only toggled
> from the driver init path which is outside the cpuhp critical section.
>
> Fixes: f3a052391822 ("cpufreq: amd-pstate: Enable amd-pstate preferred core support")
> Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
> ---
> drivers/cpufreq/amd-pstate.c | 28 ++++++++--------------------
> 1 file changed, 8 insertions(+), 20 deletions(-)
>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index c54c031939c8..ee638589f5f9 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -794,19 +794,9 @@ static void amd_perf_ctl_reset(unsigned int cpu)
> wrmsrl_on_cpu(cpu, MSR_AMD_PERF_CTL, 0);
> }
>
> -/*
> - * Set amd-pstate preferred core enable can't be done directly from cpufreq callbacks
> - * due to locking, so queue the work for later.
> - */
> -static void amd_pstste_sched_prefcore_workfn(struct work_struct *work)
> -{
> - sched_set_itmt_support();
> -}
> -static DECLARE_WORK(sched_prefcore_work, amd_pstste_sched_prefcore_workfn);
> -
> #define CPPC_MAX_PERF U8_MAX
>
> -static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
> +static void amd_pstate_init_asym_prio(struct amd_cpudata *cpudata)
I think the previous function name was fine.
1) It still does set cpudata->hw_prefcore afterall and
2) We still have an amd_detect_prefcore() that is used to determine
whether amd_pstate_prefcore is set.
> {
> /* user disabled or not detected */
> if (!amd_pstate_prefcore)
> @@ -814,14 +804,8 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
>
> cpudata->hw_prefcore = true;
>
> - /*
> - * The priorities can be set regardless of whether or not
> - * sched_set_itmt_support(true) has been called and it is valid to
> - * update them at any time after it has been called.
> - */
> + /* The priorities must be initialized before ITMT support can be toggled on. */
> sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu);
> -
> - schedule_work(&sched_prefcore_work);
> }
>
> static void amd_pstate_update_limits(unsigned int cpu)
> @@ -974,7 +958,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
> if (ret)
> goto free_cpudata1;
>
> - amd_pstate_init_prefcore(cpudata);
> + amd_pstate_init_asym_prio(cpudata);
>
> ret = amd_pstate_init_freq(cpudata);
> if (ret)
> @@ -1450,7 +1434,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
> if (ret)
> goto free_cpudata1;
>
> - amd_pstate_init_prefcore(cpudata);
> + amd_pstate_init_asym_prio(cpudata);
>
> ret = amd_pstate_init_freq(cpudata);
> if (ret)
> @@ -1780,6 +1764,10 @@ static int __init amd_pstate_init(void)
> }
> }
>
> + /* Enable ITMT support once all CPUs have initialized their asym priorities. */
> + if (amd_pstate_prefcore)
> + sched_set_itmt_support();
> +
Hmm, by moving it after the first registration that has the side effect
that if you changed driver modes from active to passive (for example)
ITMT priorities stay identical and aren't updated.
I guess that makes sense since the rankings /shouldn't/ change.
I feel this should be OK, thanks.
> return ret;
>
> global_attr_free:
>
> base-commit: 56a49e19e1aea1374e9ba58cfd40260587bb7355
Hello Mario,
On 4/10/2025 2:28 AM, Mario Limonciello wrote:
[..snip..]
>> #define CPPC_MAX_PERF U8_MAX
>> -static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
>> +static void amd_pstate_init_asym_prio(struct amd_cpudata *cpudata)
>
> I think the previous function name was fine.
>
> 1) It still does set cpudata->hw_prefcore afterall and
> 2) We still have an amd_detect_prefcore() that is used to determine whether amd_pstate_prefcore is set.
Ack. I'll change it back in v2.
>
>> {
>> /* user disabled or not detected */
>> if (!amd_pstate_prefcore)
>> @@ -814,14 +804,8 @@ static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
>> cpudata->hw_prefcore = true;
>> - /*
>> - * The priorities can be set regardless of whether or not
>> - * sched_set_itmt_support(true) has been called and it is valid to
>> - * update them at any time after it has been called.
>> - */
>> + /* The priorities must be initialized before ITMT support can be toggled on. */
>> sched_set_itmt_core_prio((int)READ_ONCE(cpudata->prefcore_ranking), cpudata->cpu);
>> -
>> - schedule_work(&sched_prefcore_work);
>> }
>> static void amd_pstate_update_limits(unsigned int cpu)
>> @@ -974,7 +958,7 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>> if (ret)
>> goto free_cpudata1;
>> - amd_pstate_init_prefcore(cpudata);
>> + amd_pstate_init_asym_prio(cpudata);
>> ret = amd_pstate_init_freq(cpudata);
>> if (ret)
>> @@ -1450,7 +1434,7 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>> if (ret)
>> goto free_cpudata1;
>> - amd_pstate_init_prefcore(cpudata);
>> + amd_pstate_init_asym_prio(cpudata);
>> ret = amd_pstate_init_freq(cpudata);
>> if (ret)
>> @@ -1780,6 +1764,10 @@ static int __init amd_pstate_init(void)
>> }
>> }
>> + /* Enable ITMT support once all CPUs have initialized their asym priorities. */
>> + if (amd_pstate_prefcore)
>> + sched_set_itmt_support();
>> +
>
> Hmm, by moving it after the first registration that has the side effect that if you changed driver modes from active to passive (for example) ITMT priorities stay identical and aren't updated.
> I guess that makes sense since the rankings /shouldn't/ change.
Currently, when amd-pstate unregisters during mode switch, ITMT remains
enabled and if the rankings change before the new driver is registered,
update_limits() is never called and that too can cause issues.
Based on discussion with Dhananjay, and the fact that one can mode
switch to disable amd-pstate completely, What are your thoughts on this
addition diff:
diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 40d908188b78..320b9551947e 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -1177,6 +1177,9 @@ static ssize_t show_energy_performance_preference(
static void amd_pstate_driver_cleanup(void)
{
+ if (amd_pstate_prefcore)
+ sched_clear_itmt_support();
+
cppc_state = AMD_PSTATE_DISABLE;
current_pstate_driver = NULL;
}
@@ -1219,6 +1222,10 @@ static int amd_pstate_register_driver(int mode)
return ret;
}
+ /* Enable ITMT support once all CPUs have initialized their asym priorities. */
+ if (amd_pstate_prefcore)
+ sched_set_itmt_support();
+
return 0;
}
@@ -1761,10 +1768,6 @@ static int __init amd_pstate_init(void)
}
}
- /* Enable ITMT support once all CPUs have initialized their asym priorities. */
- if (amd_pstate_prefcore)
- sched_set_itmt_support();
-
return ret;
global_attr_free:
--
This way, when the new driver registers, it can repopulate the rankings
and then the sched domain rebuild will get everything right. The only
concern is that disabling ITMT support during mode switch will cause the
sched domains to be rebuilt twice but I'm assuming mode switch is a rare
operation.
If disabling and re-enabling ITMT support during mode switch is a
concern, I can work something into my dynamic asym priority support
series to detect any changes to the ranking during mode switch and use
sched_update_asym_prefer_cpu() to update the "asym_prefer_cpu" that way.
Let me know your thoughts.
>
> I feel this should be OK, thanks.
>
>> return ret;
>> global_attr_free:
>>
>> base-commit: 56a49e19e1aea1374e9ba58cfd40260587bb7355
>
--
Thanks and Regards,
Prateek
On 4/9/2025 11:41 PM, K Prateek Nayak wrote:
> Hello Mario,
>
> On 4/10/2025 2:28 AM, Mario Limonciello wrote:
>
> [..snip..]
>
>>> #define CPPC_MAX_PERF U8_MAX
>>> -static void amd_pstate_init_prefcore(struct amd_cpudata *cpudata)
>>> +static void amd_pstate_init_asym_prio(struct amd_cpudata *cpudata)
>>
>> I think the previous function name was fine.
>>
>> 1) It still does set cpudata->hw_prefcore afterall and
>> 2) We still have an amd_detect_prefcore() that is used to determine
>> whether amd_pstate_prefcore is set.
>
> Ack. I'll change it back in v2.
>
>>
>>> {
>>> /* user disabled or not detected */
>>> if (!amd_pstate_prefcore)
>>> @@ -814,14 +804,8 @@ static void amd_pstate_init_prefcore(struct
>>> amd_cpudata *cpudata)
>>> cpudata->hw_prefcore = true;
>>> - /*
>>> - * The priorities can be set regardless of whether or not
>>> - * sched_set_itmt_support(true) has been called and it is valid to
>>> - * update them at any time after it has been called.
>>> - */
>>> + /* The priorities must be initialized before ITMT support can be
>>> toggled on. */
>>> sched_set_itmt_core_prio((int)READ_ONCE(cpudata-
>>> >prefcore_ranking), cpudata->cpu);
>>> -
>>> - schedule_work(&sched_prefcore_work);
>>> }
>>> static void amd_pstate_update_limits(unsigned int cpu)
>>> @@ -974,7 +958,7 @@ static int amd_pstate_cpu_init(struct
>>> cpufreq_policy *policy)
>>> if (ret)
>>> goto free_cpudata1;
>>> - amd_pstate_init_prefcore(cpudata);
>>> + amd_pstate_init_asym_prio(cpudata);
>>> ret = amd_pstate_init_freq(cpudata);
>>> if (ret)
>>> @@ -1450,7 +1434,7 @@ static int amd_pstate_epp_cpu_init(struct
>>> cpufreq_policy *policy)
>>> if (ret)
>>> goto free_cpudata1;
>>> - amd_pstate_init_prefcore(cpudata);
>>> + amd_pstate_init_asym_prio(cpudata);
>>> ret = amd_pstate_init_freq(cpudata);
>>> if (ret)
>>> @@ -1780,6 +1764,10 @@ static int __init amd_pstate_init(void)
>>> }
>>> }
>>> + /* Enable ITMT support once all CPUs have initialized their asym
>>> priorities. */
>>> + if (amd_pstate_prefcore)
>>> + sched_set_itmt_support();
>>> +
>>
>> Hmm, by moving it after the first registration that has the side
>> effect that if you changed driver modes from active to passive (for
>> example) ITMT priorities stay identical and aren't updated.
>> I guess that makes sense since the rankings /shouldn't/ change.
>
> Currently, when amd-pstate unregisters during mode switch, ITMT remains
> enabled and if the rankings change before the new driver is registered,
> update_limits() is never called and that too can cause issues.
>
> Based on discussion with Dhananjay, and the fact that one can mode
> switch to disable amd-pstate completely, What are your thoughts on this
> addition diff:
>
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 40d908188b78..320b9551947e 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -1177,6 +1177,9 @@ static ssize_t show_energy_performance_preference(
>
> static void amd_pstate_driver_cleanup(void)
> {
> + if (amd_pstate_prefcore)
> + sched_clear_itmt_support();
> +
> cppc_state = AMD_PSTATE_DISABLE;
> current_pstate_driver = NULL;
> }
> @@ -1219,6 +1222,10 @@ static int amd_pstate_register_driver(int mode)
> return ret;
> }
>
> + /* Enable ITMT support once all CPUs have initialized their asym
> priorities. */
> + if (amd_pstate_prefcore)
> + sched_set_itmt_support();
> +
> return 0;
> }
>
> @@ -1761,10 +1768,6 @@ static int __init amd_pstate_init(void)
> }
> }
>
> - /* Enable ITMT support once all CPUs have initialized their asym
> priorities. */
> - if (amd_pstate_prefcore)
> - sched_set_itmt_support();
> -
> return ret;
>
> global_attr_free:
> --
>
> This way, when the new driver registers, it can repopulate the rankings
> and then the sched domain rebuild will get everything right.
Yes; that's *exactly* what I was thinking is needed when I made my comment.
> The only
> concern is that disabling ITMT support during mode switch will cause the
> sched domains to be rebuilt twice but I'm assuming mode switch is a rare
> operation.
Yes; it's not something we expect people to be doing frequently. We
expect most people like one mode for $REASONS and stick to it and tune
the knobs 'in the mode' at runtime as they see fit.
So yeah roll that diff into v2 and we should be good.
© 2016 - 2026 Red Hat, Inc.