[PATCH v2 2/2] cpufreq/amd-pstate: Add support for the "Requested CPU Min frequency" BIOS option

Dhananjay Ugwekar posted 2 patches 7 months, 3 weeks ago
There is a newer version of this series
[PATCH v2 2/2] cpufreq/amd-pstate: Add support for the "Requested CPU Min frequency" BIOS option
Posted by Dhananjay Ugwekar 7 months, 3 weeks ago
Initialize lower frequency limit to the "Requested CPU Min frequency"
BIOS option (if it is set) value as part of the driver->init()
callback. The BIOS specified value is passed by the PMFW as min_perf in
CPPC_REQ MSR. 

To ensure that we don't mistake a stale min_perf value in CPPC_REQ 
value as the "Requested CPU Min frequency" during a kexec wakeup, reset 
the CPPC_REQ.min_perf value back to the BIOS specified one in the offline,
exit and suspend callbacks. amd_pstate_target() and 
amd_pstate_epp_update_limit() which are invoked as part of the resume() 
and online() callbacks will take care of restoring the CPPC_REQ back to 
the latest sane values.

Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
---
Changes in v2:
* Modify the condition in msr_init_perf to initialize perf.bios_min_perf 
  to 0 by default
* Use READ_ONCE to read cpudata->perf in exit, suspend and offline
  callbacks
---
 drivers/cpufreq/amd-pstate.c | 67 +++++++++++++++++++++++++++++-------
 drivers/cpufreq/amd-pstate.h |  2 ++
 2 files changed, 56 insertions(+), 13 deletions(-)

diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
index 02de51001eba..407fdd31fb0b 100644
--- a/drivers/cpufreq/amd-pstate.c
+++ b/drivers/cpufreq/amd-pstate.c
@@ -389,7 +389,8 @@ static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy)
 static int msr_init_perf(struct amd_cpudata *cpudata)
 {
 	union perf_cached perf = READ_ONCE(cpudata->perf);
-	u64 cap1, numerator;
+	u64 cap1, numerator, cppc_req;
+	u8 min_perf;
 
 	int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
 				     &cap1);
@@ -400,6 +401,22 @@ static int msr_init_perf(struct amd_cpudata *cpudata)
 	if (ret)
 		return ret;
 
+	ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req);
+	if (ret)
+		return ret;
+
+	WRITE_ONCE(cpudata->cppc_req_cached, cppc_req);
+	min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req);
+
+	/*
+	 * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an
+	 * indication that the min_perf value is the one specified through the BIOS option
+	 */
+	cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK);
+
+	if (!cppc_req)
+		perf.bios_min_perf = min_perf;
+
 	perf.highest_perf = numerator;
 	perf.max_limit_perf = numerator;
 	perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
@@ -580,20 +597,26 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
 {
 	/*
 	 * Initialize lower frequency limit (i.e.policy->min) with
-	 * lowest_nonlinear_frequency which is the most energy efficient
-	 * frequency. Override the initial value set by cpufreq core and
-	 * amd-pstate qos_requests.
+	 * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS,
+	 * Override the initial value set by cpufreq core and amd-pstate qos_requests.
 	 */
 	if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
 		struct cpufreq_policy *policy __free(put_cpufreq_policy) =
 					      cpufreq_cpu_get(policy_data->cpu);
 		struct amd_cpudata *cpudata;
+		union perf_cached perf;
 
 		if (!policy)
 			return -EINVAL;
 
 		cpudata = policy->driver_data;
-		policy_data->min = cpudata->lowest_nonlinear_freq;
+		perf = READ_ONCE(cpudata->perf);
+
+		if (perf.bios_min_perf)
+			policy_data->min = perf_to_freq(perf, cpudata->nominal_freq,
+							perf.bios_min_perf);
+		else
+			policy_data->min = cpudata->lowest_nonlinear_freq;
 	}
 
 	cpufreq_verify_within_cpu_limits(policy_data);
@@ -1040,6 +1063,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
 static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
 {
 	struct amd_cpudata *cpudata = policy->driver_data;
+	union perf_cached perf = READ_ONCE(cpudata->perf);
+
+	/* Reset CPPC_REQ MSR to the BIOS value */
+	amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
 
 	freq_qos_remove_request(&cpudata->req[1]);
 	freq_qos_remove_request(&cpudata->req[0]);
@@ -1428,7 +1455,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
 	struct amd_cpudata *cpudata;
 	union perf_cached perf;
 	struct device *dev;
-	u64 value;
 	int ret;
 
 	/*
@@ -1493,12 +1519,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
 		cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
 	}
 
-	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
-		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
-		if (ret)
-			return ret;
-		WRITE_ONCE(cpudata->cppc_req_cached, value);
-	}
 	ret = amd_pstate_set_epp(policy, cpudata->epp_default);
 	if (ret)
 		return ret;
@@ -1518,6 +1538,11 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
 	struct amd_cpudata *cpudata = policy->driver_data;
 
 	if (cpudata) {
+		union perf_cached perf = READ_ONCE(cpudata->perf);
+
+		/* Reset CPPC_REQ MSR to the BIOS value */
+		amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
+
 		kfree(cpudata);
 		policy->driver_data = NULL;
 	}
@@ -1575,12 +1600,28 @@ static int amd_pstate_cpu_online(struct cpufreq_policy *policy)
 
 static int amd_pstate_cpu_offline(struct cpufreq_policy *policy)
 {
-	return 0;
+	struct amd_cpudata *cpudata = policy->driver_data;
+	union perf_cached perf = READ_ONCE(cpudata->perf);
+
+	/*
+	 * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
+	 * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the
+	 * limits, epp and desired perf will get reset to the cached values in cpudata struct
+	 */
+	return amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
 }
 
 static int amd_pstate_suspend(struct cpufreq_policy *policy)
 {
 	struct amd_cpudata *cpudata = policy->driver_data;
+	union perf_cached perf = READ_ONCE(cpudata->perf);
+
+	/*
+	 * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
+	 * min_perf value across kexec reboots. If this CPU is just resumed back without kexec,
+	 * the limits, epp and desired perf will get reset to the cached values in cpudata struct
+	 */
+	amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
 
 	/* invalidate to ensure it's rewritten during resume */
 	cpudata->cppc_req_cached = 0;
diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
index fbe1c08d3f06..2f7ae364d331 100644
--- a/drivers/cpufreq/amd-pstate.h
+++ b/drivers/cpufreq/amd-pstate.h
@@ -30,6 +30,7 @@
  * @lowest_perf: the absolute lowest performance level of the processor
  * @min_limit_perf: Cached value of the performance corresponding to policy->min
  * @max_limit_perf: Cached value of the performance corresponding to policy->max
+ * @bios_min_perf: Cached perf value corresponding to the "Requested CPU Min Frequency" BIOS option
  */
 union perf_cached {
 	struct {
@@ -39,6 +40,7 @@ union perf_cached {
 		u8	lowest_perf;
 		u8	min_limit_perf;
 		u8	max_limit_perf;
+		u8	bios_min_perf;
 	};
 	u64	val;
 };
-- 
2.34.1
Re: [PATCH v2 2/2] cpufreq/amd-pstate: Add support for the "Requested CPU Min frequency" BIOS option
Posted by Mario Limonciello 7 months, 3 weeks ago
On 4/21/2025 3:04 AM, Dhananjay Ugwekar wrote:
> Initialize lower frequency limit to the "Requested CPU Min frequency"
> BIOS option (if it is set) value as part of the driver->init()
> callback. The BIOS specified value is passed by the PMFW as min_perf in
> CPPC_REQ MSR.
> 
> To ensure that we don't mistake a stale min_perf value in CPPC_REQ
> value as the "Requested CPU Min frequency" during a kexec wakeup, reset
> the CPPC_REQ.min_perf value back to the BIOS specified one in the offline,
> exit and suspend callbacks. amd_pstate_target() and
> amd_pstate_epp_update_limit() which are invoked as part of the resume()
> and online() callbacks will take care of restoring the CPPC_REQ back to
> the latest sane values.
> 
> Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
> ---
> Changes in v2:
> * Modify the condition in msr_init_perf to initialize perf.bios_min_perf
>    to 0 by default
> * Use READ_ONCE to read cpudata->perf in exit, suspend and offline
>    callbacks
> ---
>   drivers/cpufreq/amd-pstate.c | 67 +++++++++++++++++++++++++++++-------
>   drivers/cpufreq/amd-pstate.h |  2 ++
>   2 files changed, 56 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
> index 02de51001eba..407fdd31fb0b 100644
> --- a/drivers/cpufreq/amd-pstate.c
> +++ b/drivers/cpufreq/amd-pstate.c
> @@ -389,7 +389,8 @@ static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy)
>   static int msr_init_perf(struct amd_cpudata *cpudata)
>   {
>   	union perf_cached perf = READ_ONCE(cpudata->perf);
> -	u64 cap1, numerator;
> +	u64 cap1, numerator, cppc_req;
> +	u8 min_perf;
>   
>   	int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
>   				     &cap1);
> @@ -400,6 +401,22 @@ static int msr_init_perf(struct amd_cpudata *cpudata)
>   	if (ret)
>   		return ret;
>   
> +	ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req);
> +	if (ret)
> +		return ret;
> +
> +	WRITE_ONCE(cpudata->cppc_req_cached, cppc_req);
> +	min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req);
> +
> +	/*
> +	 * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an
> +	 * indication that the min_perf value is the one specified through the BIOS option
> +	 */
> +	cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK);
> +
> +	if (!cppc_req)
> +		perf.bios_min_perf = min_perf;
> +
>   	perf.highest_perf = numerator;
>   	perf.max_limit_perf = numerator;
>   	perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
> @@ -580,20 +597,26 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
>   {
>   	/*
>   	 * Initialize lower frequency limit (i.e.policy->min) with
> -	 * lowest_nonlinear_frequency which is the most energy efficient
> -	 * frequency. Override the initial value set by cpufreq core and
> -	 * amd-pstate qos_requests.
> +	 * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS,
> +	 * Override the initial value set by cpufreq core and amd-pstate qos_requests.
>   	 */
>   	if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
>   		struct cpufreq_policy *policy __free(put_cpufreq_policy) =
>   					      cpufreq_cpu_get(policy_data->cpu);
>   		struct amd_cpudata *cpudata;
> +		union perf_cached perf;
>   
>   		if (!policy)
>   			return -EINVAL;
>   
>   		cpudata = policy->driver_data;
> -		policy_data->min = cpudata->lowest_nonlinear_freq;
> +		perf = READ_ONCE(cpudata->perf);
> +
> +		if (perf.bios_min_perf)
> +			policy_data->min = perf_to_freq(perf, cpudata->nominal_freq,
> +							perf.bios_min_perf);
> +		else
> +			policy_data->min = cpudata->lowest_nonlinear_freq;
>   	}
>   
>   	cpufreq_verify_within_cpu_limits(policy_data);
> @@ -1040,6 +1063,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>   static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
>   {
>   	struct amd_cpudata *cpudata = policy->driver_data;
> +	union perf_cached perf = READ_ONCE(cpudata->perf);
> +
> +	/* Reset CPPC_REQ MSR to the BIOS value */
> +	amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>   
>   	freq_qos_remove_request(&cpudata->req[1]);
>   	freq_qos_remove_request(&cpudata->req[0]);
> @@ -1428,7 +1455,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>   	struct amd_cpudata *cpudata;
>   	union perf_cached perf;
>   	struct device *dev;
> -	u64 value;
>   	int ret;
>   
>   	/*
> @@ -1493,12 +1519,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>   		cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
>   	}
>   
> -	if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
> -		ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
> -		if (ret)
> -			return ret;
> -		WRITE_ONCE(cpudata->cppc_req_cached, value);
> -	}
>   	ret = amd_pstate_set_epp(policy, cpudata->epp_default);
>   	if (ret)
>   		return ret;
> @@ -1518,6 +1538,11 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
>   	struct amd_cpudata *cpudata = policy->driver_data;
>   
>   	if (cpudata) {
> +		union perf_cached perf = READ_ONCE(cpudata->perf);
> +
> +		/* Reset CPPC_REQ MSR to the BIOS value */
> +		amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
> +
>   		kfree(cpudata);
>   		policy->driver_data = NULL;
>   	}
> @@ -1575,12 +1600,28 @@ static int amd_pstate_cpu_online(struct cpufreq_policy *policy)
>   
>   static int amd_pstate_cpu_offline(struct cpufreq_policy *policy)
>   {
> -	return 0;
> +	struct amd_cpudata *cpudata = policy->driver_data;
> +	union perf_cached perf = READ_ONCE(cpudata->perf);
> +
> +	/*
> +	 * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
> +	 * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the
> +	 * limits, epp and desired perf will get reset to the cached values in cpudata struct
> +	 */
> +	return amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>   }
>   
>   static int amd_pstate_suspend(struct cpufreq_policy *policy)
>   {
>   	struct amd_cpudata *cpudata = policy->driver_data;
> +	union perf_cached perf = READ_ONCE(cpudata->perf);
> +
> +	/*
> +	 * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
> +	 * min_perf value across kexec reboots. If this CPU is just resumed back without kexec,
> +	 * the limits, epp and desired perf will get reset to the cached values in cpudata struct
> +	 */
> +	amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);

In EPP mode this appears it would be OK because the perf value should 
get reset in the resume for amd_pstate_epp_update_limit() but in passive 
mode won't this never get reset on resume from suspend?

>   
>   	/* invalidate to ensure it's rewritten during resume */
>   	cpudata->cppc_req_cached = 0;
> diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
> index fbe1c08d3f06..2f7ae364d331 100644
> --- a/drivers/cpufreq/amd-pstate.h
> +++ b/drivers/cpufreq/amd-pstate.h
> @@ -30,6 +30,7 @@
>    * @lowest_perf: the absolute lowest performance level of the processor
>    * @min_limit_perf: Cached value of the performance corresponding to policy->min
>    * @max_limit_perf: Cached value of the performance corresponding to policy->max
> + * @bios_min_perf: Cached perf value corresponding to the "Requested CPU Min Frequency" BIOS option
>    */
>   union perf_cached {
>   	struct {
> @@ -39,6 +40,7 @@ union perf_cached {
>   		u8	lowest_perf;
>   		u8	min_limit_perf;
>   		u8	max_limit_perf;
> +		u8	bios_min_perf;
>   	};
>   	u64	val;
>   };
Re: [PATCH v2 2/2] cpufreq/amd-pstate: Add support for the "Requested CPU Min frequency" BIOS option
Posted by Dhananjay Ugwekar 7 months, 3 weeks ago

On 4/21/2025 10:23 PM, Mario Limonciello wrote:
> On 4/21/2025 3:04 AM, Dhananjay Ugwekar wrote:
>> Initialize lower frequency limit to the "Requested CPU Min frequency"
>> BIOS option (if it is set) value as part of the driver->init()
>> callback. The BIOS specified value is passed by the PMFW as min_perf in
>> CPPC_REQ MSR.
>>
>> To ensure that we don't mistake a stale min_perf value in CPPC_REQ
>> value as the "Requested CPU Min frequency" during a kexec wakeup, reset
>> the CPPC_REQ.min_perf value back to the BIOS specified one in the offline,
>> exit and suspend callbacks. amd_pstate_target() and
>> amd_pstate_epp_update_limit() which are invoked as part of the resume()
>> and online() callbacks will take care of restoring the CPPC_REQ back to
>> the latest sane values.
>>
>> Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
>> ---
>> Changes in v2:
>> * Modify the condition in msr_init_perf to initialize perf.bios_min_perf
>>    to 0 by default
>> * Use READ_ONCE to read cpudata->perf in exit, suspend and offline
>>    callbacks
>> ---
>>   drivers/cpufreq/amd-pstate.c | 67 +++++++++++++++++++++++++++++-------
>>   drivers/cpufreq/amd-pstate.h |  2 ++
>>   2 files changed, 56 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>> index 02de51001eba..407fdd31fb0b 100644
>> --- a/drivers/cpufreq/amd-pstate.c
>> +++ b/drivers/cpufreq/amd-pstate.c
>> @@ -389,7 +389,8 @@ static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy)
>>   static int msr_init_perf(struct amd_cpudata *cpudata)
>>   {
>>       union perf_cached perf = READ_ONCE(cpudata->perf);
>> -    u64 cap1, numerator;
>> +    u64 cap1, numerator, cppc_req;
>> +    u8 min_perf;
>>         int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
>>                        &cap1);
>> @@ -400,6 +401,22 @@ static int msr_init_perf(struct amd_cpudata *cpudata)
>>       if (ret)
>>           return ret;
>>   +    ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req);
>> +    if (ret)
>> +        return ret;
>> +
>> +    WRITE_ONCE(cpudata->cppc_req_cached, cppc_req);
>> +    min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req);
>> +
>> +    /*
>> +     * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an
>> +     * indication that the min_perf value is the one specified through the BIOS option
>> +     */
>> +    cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK);
>> +
>> +    if (!cppc_req)
>> +        perf.bios_min_perf = min_perf;
>> +
>>       perf.highest_perf = numerator;
>>       perf.max_limit_perf = numerator;
>>       perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
>> @@ -580,20 +597,26 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
>>   {
>>       /*
>>        * Initialize lower frequency limit (i.e.policy->min) with
>> -     * lowest_nonlinear_frequency which is the most energy efficient
>> -     * frequency. Override the initial value set by cpufreq core and
>> -     * amd-pstate qos_requests.
>> +     * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS,
>> +     * Override the initial value set by cpufreq core and amd-pstate qos_requests.
>>        */
>>       if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
>>           struct cpufreq_policy *policy __free(put_cpufreq_policy) =
>>                             cpufreq_cpu_get(policy_data->cpu);
>>           struct amd_cpudata *cpudata;
>> +        union perf_cached perf;
>>             if (!policy)
>>               return -EINVAL;
>>             cpudata = policy->driver_data;
>> -        policy_data->min = cpudata->lowest_nonlinear_freq;
>> +        perf = READ_ONCE(cpudata->perf);
>> +
>> +        if (perf.bios_min_perf)
>> +            policy_data->min = perf_to_freq(perf, cpudata->nominal_freq,
>> +                            perf.bios_min_perf);
>> +        else
>> +            policy_data->min = cpudata->lowest_nonlinear_freq;
>>       }
>>         cpufreq_verify_within_cpu_limits(policy_data);
>> @@ -1040,6 +1063,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>>   static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
>>   {
>>       struct amd_cpudata *cpudata = policy->driver_data;
>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>> +
>> +    /* Reset CPPC_REQ MSR to the BIOS value */
>> +    amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>         freq_qos_remove_request(&cpudata->req[1]);
>>       freq_qos_remove_request(&cpudata->req[0]);
>> @@ -1428,7 +1455,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>>       struct amd_cpudata *cpudata;
>>       union perf_cached perf;
>>       struct device *dev;
>> -    u64 value;
>>       int ret;
>>         /*
>> @@ -1493,12 +1519,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>>           cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
>>       }
>>   -    if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
>> -        ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
>> -        if (ret)
>> -            return ret;
>> -        WRITE_ONCE(cpudata->cppc_req_cached, value);
>> -    }
>>       ret = amd_pstate_set_epp(policy, cpudata->epp_default);
>>       if (ret)
>>           return ret;
>> @@ -1518,6 +1538,11 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
>>       struct amd_cpudata *cpudata = policy->driver_data;
>>         if (cpudata) {
>> +        union perf_cached perf = READ_ONCE(cpudata->perf);
>> +
>> +        /* Reset CPPC_REQ MSR to the BIOS value */
>> +        amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>> +
>>           kfree(cpudata);
>>           policy->driver_data = NULL;
>>       }
>> @@ -1575,12 +1600,28 @@ static int amd_pstate_cpu_online(struct cpufreq_policy *policy)
>>     static int amd_pstate_cpu_offline(struct cpufreq_policy *policy)
>>   {
>> -    return 0;
>> +    struct amd_cpudata *cpudata = policy->driver_data;
>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>> +
>> +    /*
>> +     * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>> +     * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the
>> +     * limits, epp and desired perf will get reset to the cached values in cpudata struct
>> +     */
>> +    return amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>   }
>>     static int amd_pstate_suspend(struct cpufreq_policy *policy)
>>   {
>>       struct amd_cpudata *cpudata = policy->driver_data;
>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>> +
>> +    /*
>> +     * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>> +     * min_perf value across kexec reboots. If this CPU is just resumed back without kexec,
>> +     * the limits, epp and desired perf will get reset to the cached values in cpudata struct
>> +     */
>> +    amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
> 
> In EPP mode this appears it would be OK because the perf value should get reset in the resume for amd_pstate_epp_update_limit() but in passive mode won't this never get reset on resume from suspend?

In passive mode, on resume, amd_pstate_target gets invoked through the code flow mentioned below,

Cpufreq_resume()->cpufreq_start_governor->(cpufreq_driver->start()&&cpufreq_driver->limits())->amd_pstate_target() [this is for _target() based governors]
For schedutil, start_governor will register the update_util hook and it will be called at every util change, which eventually calls adjust_perf()

I tested these scenarios using "sudo rtcwake -m mem -s 10" (suspend to mem for 10 seconds) on a server system, within 1-2 mins of the wakeup the CPPC_REQ MSR 
values of all CPUs get updated to sane ones. It would be helpful if you could test for such scenarios on the client systems as well.

That said, there might be a small window between the resume and governor trigger, where the value in CPPC_REQ MSR would be invalid. Are we okay with that ?

> 
>>         /* invalidate to ensure it's rewritten during resume */
>>       cpudata->cppc_req_cached = 0;
>> diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
>> index fbe1c08d3f06..2f7ae364d331 100644
>> --- a/drivers/cpufreq/amd-pstate.h
>> +++ b/drivers/cpufreq/amd-pstate.h
>> @@ -30,6 +30,7 @@
>>    * @lowest_perf: the absolute lowest performance level of the processor
>>    * @min_limit_perf: Cached value of the performance corresponding to policy->min
>>    * @max_limit_perf: Cached value of the performance corresponding to policy->max
>> + * @bios_min_perf: Cached perf value corresponding to the "Requested CPU Min Frequency" BIOS option
>>    */
>>   union perf_cached {
>>       struct {
>> @@ -39,6 +40,7 @@ union perf_cached {
>>           u8    lowest_perf;
>>           u8    min_limit_perf;
>>           u8    max_limit_perf;
>> +        u8    bios_min_perf;
>>       };
>>       u64    val;
>>   };
> 

Re: [PATCH v2 2/2] cpufreq/amd-pstate: Add support for the "Requested CPU Min frequency" BIOS option
Posted by Mario Limonciello 7 months, 3 weeks ago
On 4/21/2025 11:02 PM, Dhananjay Ugwekar wrote:
> 
> 
> On 4/21/2025 10:23 PM, Mario Limonciello wrote:
>> On 4/21/2025 3:04 AM, Dhananjay Ugwekar wrote:
>>> Initialize lower frequency limit to the "Requested CPU Min frequency"
>>> BIOS option (if it is set) value as part of the driver->init()
>>> callback. The BIOS specified value is passed by the PMFW as min_perf in
>>> CPPC_REQ MSR.
>>>
>>> To ensure that we don't mistake a stale min_perf value in CPPC_REQ
>>> value as the "Requested CPU Min frequency" during a kexec wakeup, reset
>>> the CPPC_REQ.min_perf value back to the BIOS specified one in the offline,
>>> exit and suspend callbacks. amd_pstate_target() and
>>> amd_pstate_epp_update_limit() which are invoked as part of the resume()
>>> and online() callbacks will take care of restoring the CPPC_REQ back to
>>> the latest sane values.
>>>
>>> Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
>>> ---
>>> Changes in v2:
>>> * Modify the condition in msr_init_perf to initialize perf.bios_min_perf
>>>     to 0 by default
>>> * Use READ_ONCE to read cpudata->perf in exit, suspend and offline
>>>     callbacks
>>> ---
>>>    drivers/cpufreq/amd-pstate.c | 67 +++++++++++++++++++++++++++++-------
>>>    drivers/cpufreq/amd-pstate.h |  2 ++
>>>    2 files changed, 56 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>>> index 02de51001eba..407fdd31fb0b 100644
>>> --- a/drivers/cpufreq/amd-pstate.c
>>> +++ b/drivers/cpufreq/amd-pstate.c
>>> @@ -389,7 +389,8 @@ static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy)
>>>    static int msr_init_perf(struct amd_cpudata *cpudata)
>>>    {
>>>        union perf_cached perf = READ_ONCE(cpudata->perf);
>>> -    u64 cap1, numerator;
>>> +    u64 cap1, numerator, cppc_req;
>>> +    u8 min_perf;
>>>          int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
>>>                         &cap1);
>>> @@ -400,6 +401,22 @@ static int msr_init_perf(struct amd_cpudata *cpudata)
>>>        if (ret)
>>>            return ret;
>>>    +    ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req);
>>> +    if (ret)
>>> +        return ret;
>>> +
>>> +    WRITE_ONCE(cpudata->cppc_req_cached, cppc_req);
>>> +    min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req);
>>> +
>>> +    /*
>>> +     * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an
>>> +     * indication that the min_perf value is the one specified through the BIOS option
>>> +     */
>>> +    cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK);
>>> +
>>> +    if (!cppc_req)
>>> +        perf.bios_min_perf = min_perf;
>>> +
>>>        perf.highest_perf = numerator;
>>>        perf.max_limit_perf = numerator;
>>>        perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
>>> @@ -580,20 +597,26 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
>>>    {
>>>        /*
>>>         * Initialize lower frequency limit (i.e.policy->min) with
>>> -     * lowest_nonlinear_frequency which is the most energy efficient
>>> -     * frequency. Override the initial value set by cpufreq core and
>>> -     * amd-pstate qos_requests.
>>> +     * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS,
>>> +     * Override the initial value set by cpufreq core and amd-pstate qos_requests.
>>>         */
>>>        if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
>>>            struct cpufreq_policy *policy __free(put_cpufreq_policy) =
>>>                              cpufreq_cpu_get(policy_data->cpu);
>>>            struct amd_cpudata *cpudata;
>>> +        union perf_cached perf;
>>>              if (!policy)
>>>                return -EINVAL;
>>>              cpudata = policy->driver_data;
>>> -        policy_data->min = cpudata->lowest_nonlinear_freq;
>>> +        perf = READ_ONCE(cpudata->perf);
>>> +
>>> +        if (perf.bios_min_perf)
>>> +            policy_data->min = perf_to_freq(perf, cpudata->nominal_freq,
>>> +                            perf.bios_min_perf);
>>> +        else
>>> +            policy_data->min = cpudata->lowest_nonlinear_freq;
>>>        }
>>>          cpufreq_verify_within_cpu_limits(policy_data);
>>> @@ -1040,6 +1063,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>>>    static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
>>>    {
>>>        struct amd_cpudata *cpudata = policy->driver_data;
>>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>>> +
>>> +    /* Reset CPPC_REQ MSR to the BIOS value */
>>> +    amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>>          freq_qos_remove_request(&cpudata->req[1]);
>>>        freq_qos_remove_request(&cpudata->req[0]);
>>> @@ -1428,7 +1455,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>>>        struct amd_cpudata *cpudata;
>>>        union perf_cached perf;
>>>        struct device *dev;
>>> -    u64 value;
>>>        int ret;
>>>          /*
>>> @@ -1493,12 +1519,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>>>            cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
>>>        }
>>>    -    if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
>>> -        ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
>>> -        if (ret)
>>> -            return ret;
>>> -        WRITE_ONCE(cpudata->cppc_req_cached, value);
>>> -    }
>>>        ret = amd_pstate_set_epp(policy, cpudata->epp_default);
>>>        if (ret)
>>>            return ret;
>>> @@ -1518,6 +1538,11 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
>>>        struct amd_cpudata *cpudata = policy->driver_data;
>>>          if (cpudata) {
>>> +        union perf_cached perf = READ_ONCE(cpudata->perf);
>>> +
>>> +        /* Reset CPPC_REQ MSR to the BIOS value */
>>> +        amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>> +
>>>            kfree(cpudata);
>>>            policy->driver_data = NULL;
>>>        }
>>> @@ -1575,12 +1600,28 @@ static int amd_pstate_cpu_online(struct cpufreq_policy *policy)
>>>      static int amd_pstate_cpu_offline(struct cpufreq_policy *policy)
>>>    {
>>> -    return 0;
>>> +    struct amd_cpudata *cpudata = policy->driver_data;
>>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>>> +
>>> +    /*
>>> +     * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>>> +     * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the
>>> +     * limits, epp and desired perf will get reset to the cached values in cpudata struct
>>> +     */
>>> +    return amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>>    }
>>>      static int amd_pstate_suspend(struct cpufreq_policy *policy)
>>>    {
>>>        struct amd_cpudata *cpudata = policy->driver_data;
>>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>>> +
>>> +    /*
>>> +     * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>>> +     * min_perf value across kexec reboots. If this CPU is just resumed back without kexec,
>>> +     * the limits, epp and desired perf will get reset to the cached values in cpudata struct
>>> +     */
>>> +    amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>
>> In EPP mode this appears it would be OK because the perf value should get reset in the resume for amd_pstate_epp_update_limit() but in passive mode won't this never get reset on resume from suspend?
> 
> In passive mode, on resume, amd_pstate_target gets invoked through the code flow mentioned below,
> 
> Cpufreq_resume()->cpufreq_start_governor->(cpufreq_driver->start()&&cpufreq_driver->limits())->amd_pstate_target() [this is for _target() based governors]
> For schedutil, start_governor will register the update_util hook and it will be called at every util change, which eventually calls adjust_perf()
> 
> I tested these scenarios using "sudo rtcwake -m mem -s 10" (suspend to mem for 10 seconds) on a server system, within 1-2 mins of the wakeup the CPPC_REQ MSR
> values of all CPUs get updated to sane ones. It would be helpful if you could test for such scenarios on the client systems as well.
> 
> That said, there might be a small window between the resume and governor trigger, where the value in CPPC_REQ MSR would be invalid. Are we okay with that ?

For server systems it's probably not much of a big deal since servers 
aren't frequently suspended, bu this window of time seems untenable for 
a client machine.  As a user that would mean effectively they have wrong 
limits programmed after wakeup for 1-2 minutes and could potentially 
have performance gimped as a result.

I'd say let's just flush the right value immediately after resume and 
then the write 1-2 minutes later becomes a no-op.  With the checks we 
have in the driver now I expect that they don't even turn into MSR writes.

> 
>>
>>>          /* invalidate to ensure it's rewritten during resume */
>>>        cpudata->cppc_req_cached = 0;
>>> diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
>>> index fbe1c08d3f06..2f7ae364d331 100644
>>> --- a/drivers/cpufreq/amd-pstate.h
>>> +++ b/drivers/cpufreq/amd-pstate.h
>>> @@ -30,6 +30,7 @@
>>>     * @lowest_perf: the absolute lowest performance level of the processor
>>>     * @min_limit_perf: Cached value of the performance corresponding to policy->min
>>>     * @max_limit_perf: Cached value of the performance corresponding to policy->max
>>> + * @bios_min_perf: Cached perf value corresponding to the "Requested CPU Min Frequency" BIOS option
>>>     */
>>>    union perf_cached {
>>>        struct {
>>> @@ -39,6 +40,7 @@ union perf_cached {
>>>            u8    lowest_perf;
>>>            u8    min_limit_perf;
>>>            u8    max_limit_perf;
>>> +        u8    bios_min_perf;
>>>        };
>>>        u64    val;
>>>    };
>>
> 

Re: [PATCH v2 2/2] cpufreq/amd-pstate: Add support for the "Requested CPU Min frequency" BIOS option
Posted by Dhananjay Ugwekar 7 months, 3 weeks ago
On 4/23/2025 7:35 PM, Mario Limonciello wrote:
> On 4/21/2025 11:02 PM, Dhananjay Ugwekar wrote:
>>
>>
>> On 4/21/2025 10:23 PM, Mario Limonciello wrote:
>>> On 4/21/2025 3:04 AM, Dhananjay Ugwekar wrote:
>>>> Initialize lower frequency limit to the "Requested CPU Min frequency"
>>>> BIOS option (if it is set) value as part of the driver->init()
>>>> callback. The BIOS specified value is passed by the PMFW as min_perf in
>>>> CPPC_REQ MSR.
>>>>
>>>> To ensure that we don't mistake a stale min_perf value in CPPC_REQ
>>>> value as the "Requested CPU Min frequency" during a kexec wakeup, reset
>>>> the CPPC_REQ.min_perf value back to the BIOS specified one in the offline,
>>>> exit and suspend callbacks. amd_pstate_target() and
>>>> amd_pstate_epp_update_limit() which are invoked as part of the resume()
>>>> and online() callbacks will take care of restoring the CPPC_REQ back to
>>>> the latest sane values.
>>>>
>>>> Signed-off-by: Dhananjay Ugwekar <dhananjay.ugwekar@amd.com>
>>>> ---
>>>> Changes in v2:
>>>> * Modify the condition in msr_init_perf to initialize perf.bios_min_perf
>>>>     to 0 by default
>>>> * Use READ_ONCE to read cpudata->perf in exit, suspend and offline
>>>>     callbacks
>>>> ---
>>>>    drivers/cpufreq/amd-pstate.c | 67 +++++++++++++++++++++++++++++-------
>>>>    drivers/cpufreq/amd-pstate.h |  2 ++
>>>>    2 files changed, 56 insertions(+), 13 deletions(-)
>>>>
>>>> diff --git a/drivers/cpufreq/amd-pstate.c b/drivers/cpufreq/amd-pstate.c
>>>> index 02de51001eba..407fdd31fb0b 100644
>>>> --- a/drivers/cpufreq/amd-pstate.c
>>>> +++ b/drivers/cpufreq/amd-pstate.c
>>>> @@ -389,7 +389,8 @@ static inline int amd_pstate_cppc_enable(struct cpufreq_policy *policy)
>>>>    static int msr_init_perf(struct amd_cpudata *cpudata)
>>>>    {
>>>>        union perf_cached perf = READ_ONCE(cpudata->perf);
>>>> -    u64 cap1, numerator;
>>>> +    u64 cap1, numerator, cppc_req;
>>>> +    u8 min_perf;
>>>>          int ret = rdmsrl_safe_on_cpu(cpudata->cpu, MSR_AMD_CPPC_CAP1,
>>>>                         &cap1);
>>>> @@ -400,6 +401,22 @@ static int msr_init_perf(struct amd_cpudata *cpudata)
>>>>        if (ret)
>>>>            return ret;
>>>>    +    ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &cppc_req);
>>>> +    if (ret)
>>>> +        return ret;
>>>> +
>>>> +    WRITE_ONCE(cpudata->cppc_req_cached, cppc_req);
>>>> +    min_perf = FIELD_GET(AMD_CPPC_MIN_PERF_MASK, cppc_req);
>>>> +
>>>> +    /*
>>>> +     * Clear out the min_perf part to check if the rest of the MSR is 0, if yes, this is an
>>>> +     * indication that the min_perf value is the one specified through the BIOS option
>>>> +     */
>>>> +    cppc_req &= ~(AMD_CPPC_MIN_PERF_MASK);
>>>> +
>>>> +    if (!cppc_req)
>>>> +        perf.bios_min_perf = min_perf;
>>>> +
>>>>        perf.highest_perf = numerator;
>>>>        perf.max_limit_perf = numerator;
>>>>        perf.min_limit_perf = FIELD_GET(AMD_CPPC_LOWEST_PERF_MASK, cap1);
>>>> @@ -580,20 +597,26 @@ static int amd_pstate_verify(struct cpufreq_policy_data *policy_data)
>>>>    {
>>>>        /*
>>>>         * Initialize lower frequency limit (i.e.policy->min) with
>>>> -     * lowest_nonlinear_frequency which is the most energy efficient
>>>> -     * frequency. Override the initial value set by cpufreq core and
>>>> -     * amd-pstate qos_requests.
>>>> +     * lowest_nonlinear_frequency or the min frequency (if) specified in BIOS,
>>>> +     * Override the initial value set by cpufreq core and amd-pstate qos_requests.
>>>>         */
>>>>        if (policy_data->min == FREQ_QOS_MIN_DEFAULT_VALUE) {
>>>>            struct cpufreq_policy *policy __free(put_cpufreq_policy) =
>>>>                              cpufreq_cpu_get(policy_data->cpu);
>>>>            struct amd_cpudata *cpudata;
>>>> +        union perf_cached perf;
>>>>              if (!policy)
>>>>                return -EINVAL;
>>>>              cpudata = policy->driver_data;
>>>> -        policy_data->min = cpudata->lowest_nonlinear_freq;
>>>> +        perf = READ_ONCE(cpudata->perf);
>>>> +
>>>> +        if (perf.bios_min_perf)
>>>> +            policy_data->min = perf_to_freq(perf, cpudata->nominal_freq,
>>>> +                            perf.bios_min_perf);
>>>> +        else
>>>> +            policy_data->min = cpudata->lowest_nonlinear_freq;
>>>>        }
>>>>          cpufreq_verify_within_cpu_limits(policy_data);
>>>> @@ -1040,6 +1063,10 @@ static int amd_pstate_cpu_init(struct cpufreq_policy *policy)
>>>>    static void amd_pstate_cpu_exit(struct cpufreq_policy *policy)
>>>>    {
>>>>        struct amd_cpudata *cpudata = policy->driver_data;
>>>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>>>> +
>>>> +    /* Reset CPPC_REQ MSR to the BIOS value */
>>>> +    amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>>>          freq_qos_remove_request(&cpudata->req[1]);
>>>>        freq_qos_remove_request(&cpudata->req[0]);
>>>> @@ -1428,7 +1455,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>>>>        struct amd_cpudata *cpudata;
>>>>        union perf_cached perf;
>>>>        struct device *dev;
>>>> -    u64 value;
>>>>        int ret;
>>>>          /*
>>>> @@ -1493,12 +1519,6 @@ static int amd_pstate_epp_cpu_init(struct cpufreq_policy *policy)
>>>>            cpudata->epp_default = AMD_CPPC_EPP_BALANCE_PERFORMANCE;
>>>>        }
>>>>    -    if (cpu_feature_enabled(X86_FEATURE_CPPC)) {
>>>> -        ret = rdmsrl_on_cpu(cpudata->cpu, MSR_AMD_CPPC_REQ, &value);
>>>> -        if (ret)
>>>> -            return ret;
>>>> -        WRITE_ONCE(cpudata->cppc_req_cached, value);
>>>> -    }
>>>>        ret = amd_pstate_set_epp(policy, cpudata->epp_default);
>>>>        if (ret)
>>>>            return ret;
>>>> @@ -1518,6 +1538,11 @@ static void amd_pstate_epp_cpu_exit(struct cpufreq_policy *policy)
>>>>        struct amd_cpudata *cpudata = policy->driver_data;
>>>>          if (cpudata) {
>>>> +        union perf_cached perf = READ_ONCE(cpudata->perf);
>>>> +
>>>> +        /* Reset CPPC_REQ MSR to the BIOS value */
>>>> +        amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>>> +
>>>>            kfree(cpudata);
>>>>            policy->driver_data = NULL;
>>>>        }
>>>> @@ -1575,12 +1600,28 @@ static int amd_pstate_cpu_online(struct cpufreq_policy *policy)
>>>>      static int amd_pstate_cpu_offline(struct cpufreq_policy *policy)
>>>>    {
>>>> -    return 0;
>>>> +    struct amd_cpudata *cpudata = policy->driver_data;
>>>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>>>> +
>>>> +    /*
>>>> +     * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>>>> +     * min_perf value across kexec reboots. If this CPU is just onlined normally after this, the
>>>> +     * limits, epp and desired perf will get reset to the cached values in cpudata struct
>>>> +     */
>>>> +    return amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>>>    }
>>>>      static int amd_pstate_suspend(struct cpufreq_policy *policy)
>>>>    {
>>>>        struct amd_cpudata *cpudata = policy->driver_data;
>>>> +    union perf_cached perf = READ_ONCE(cpudata->perf);
>>>> +
>>>> +    /*
>>>> +     * Reset CPPC_REQ MSR to the BIOS value, this will allow us to retain the BIOS specified
>>>> +     * min_perf value across kexec reboots. If this CPU is just resumed back without kexec,
>>>> +     * the limits, epp and desired perf will get reset to the cached values in cpudata struct
>>>> +     */
>>>> +    amd_pstate_update_perf(policy, perf.bios_min_perf, 0U, 0U, 0U, false);
>>>
>>> In EPP mode this appears it would be OK because the perf value should get reset in the resume for amd_pstate_epp_update_limit() but in passive mode won't this never get reset on resume from suspend?
>>
>> In passive mode, on resume, amd_pstate_target gets invoked through the code flow mentioned below,
>>
>> Cpufreq_resume()->cpufreq_start_governor->(cpufreq_driver->start()&&cpufreq_driver->limits())->amd_pstate_target() [this is for _target() based governors]
>> For schedutil, start_governor will register the update_util hook and it will be called at every util change, which eventually calls adjust_perf()
>>
>> I tested these scenarios using "sudo rtcwake -m mem -s 10" (suspend to mem for 10 seconds) on a server system, within 1-2 mins of the wakeup the CPPC_REQ MSR
>> values of all CPUs get updated to sane ones. It would be helpful if you could test for such scenarios on the client systems as well.
>>
>> That said, there might be a small window between the resume and governor trigger, where the value in CPPC_REQ MSR would be invalid. Are we okay with that ?
> 
> For server systems it's probably not much of a big deal since servers aren't frequently suspended, bu this window of time seems untenable for a client machine.  As a user that would mean effectively they have wrong limits programmed after wakeup for 1-2 minutes and could potentially have performance gimped as a result.
> 
> I'd say let's just flush the right value immediately after resume and then the write 1-2 minutes later becomes a no-op.  With the checks we have in the driver now I expect that they don't even turn into MSR writes.

Makes sense, I'll add this and post v3

> 
>>
>>>
>>>>          /* invalidate to ensure it's rewritten during resume */
>>>>        cpudata->cppc_req_cached = 0;
>>>> diff --git a/drivers/cpufreq/amd-pstate.h b/drivers/cpufreq/amd-pstate.h
>>>> index fbe1c08d3f06..2f7ae364d331 100644
>>>> --- a/drivers/cpufreq/amd-pstate.h
>>>> +++ b/drivers/cpufreq/amd-pstate.h
>>>> @@ -30,6 +30,7 @@
>>>>     * @lowest_perf: the absolute lowest performance level of the processor
>>>>     * @min_limit_perf: Cached value of the performance corresponding to policy->min
>>>>     * @max_limit_perf: Cached value of the performance corresponding to policy->max
>>>> + * @bios_min_perf: Cached perf value corresponding to the "Requested CPU Min Frequency" BIOS option
>>>>     */
>>>>    union perf_cached {
>>>>        struct {
>>>> @@ -39,6 +40,7 @@ union perf_cached {
>>>>            u8    lowest_perf;
>>>>            u8    min_limit_perf;
>>>>            u8    max_limit_perf;
>>>> +        u8    bios_min_perf;
>>>>        };
>>>>        u64    val;
>>>>    };
>>>
>>
>