Introduce helper set_amd_cppc_para() and get_amd_cppc_para() to
SET/GET CPPC-related para for amd-cppc/amd-cppc-epp driver.
In get_cpufreq_cppc()/set_cpufreq_cppc(), we use
"processor_pminfo[cpuid]->init & XEN_CPPC_INIT" to identify
cpufreq driver is amd-cppc/amd-cppc-epp. Furthermore, using whether
->setpolicy isn't null to tell whether amd-cppc in active mode.
Also, a new field "policy" has also been added in "struct xen_cppc_para" to
describe performance policy in active mode. It gets printed with other
cppc paras. A set of public macro XEN_CPUFREQ_POLICY_xxx is introduced
to be sync with internal ones CPUFREQ_POLICY_xxx.
A new policy XEN_CPUFREQ_POLICY_BALANCE is introduced, and could be achieved
only via xenpm XEN_SYSCTL_CPPC_SET_PRESET_BALANCE preset.
Signed-off-by: Penny Zheng <Penny.Zheng@amd.com>
---
v1 -> v2:
- Give the variable des_perf an initializer of 0
- Use the strncmp()s directly in the if()
---
v3 -> v4
- refactor comments
- remove double blank lines
- replace amd_cppc_in_use flag with XEN_PROCESSOR_PM_CPPC
---
- add new field "policy" in "struct xen_cppc_para"
- add new performamce policy XEN_CPUFREQ_POLICY_BALANCE
- drop string comparisons with "processor_pminfo[cpuid]->init & XEN_CPPC_INIT"
and "cpufreq.setpolicy == NULL"
- Blank line ahead of the main "return" of a function
- refactor comments, commit message and title
---
 tools/misc/xenpm.c                   |  10 +++
 xen/arch/x86/acpi/cpufreq/amd-cppc.c | 129 +++++++++++++++++++++++++++
 xen/drivers/acpi/pmstat.c            |  13 ++-
 xen/include/acpi/cpufreq/cpufreq.h   |  12 ++-
 xen/include/public/sysctl.h          |   5 ++
 5 files changed, 163 insertions(+), 6 deletions(-)
diff --git a/tools/misc/xenpm.c b/tools/misc/xenpm.c
index 2864506da4..2e5975cae4 100644
--- a/tools/misc/xenpm.c
+++ b/tools/misc/xenpm.c
@@ -38,6 +38,13 @@
 static xc_interface *xc_handle;
 static unsigned int max_cpu_nr;
 
+static const char cpufreq_policy_str[][12] = {
+    [XEN_CPUFREQ_POLICY_UNKNOWN] = "none",
+    [XEN_CPUFREQ_POLICY_POWERSAVE] = "powersave",
+    [XEN_CPUFREQ_POLICY_PERFORMANCE] = "performance",
+    [XEN_CPUFREQ_POLICY_BALANCE] = "balance",
+};
+
 /* help message */
 void show_help(void)
 {
@@ -984,6 +991,9 @@ static void print_cppc_para(unsigned int cpuid,
     printf("                     : desired [%"PRIu32"%s]\n",
            cppc->desired,
            cppc->desired ? "" : " hw autonomous");
+
+    printf("  performance policy : %s\n",
+           cpufreq_policy_str[cppc->policy]);
     printf("\n");
 }
 
diff --git a/xen/arch/x86/acpi/cpufreq/amd-cppc.c b/xen/arch/x86/acpi/cpufreq/amd-cppc.c
index 5f15e86dc4..490446018c 100644
--- a/xen/arch/x86/acpi/cpufreq/amd-cppc.c
+++ b/xen/arch/x86/acpi/cpufreq/amd-cppc.c
@@ -506,6 +506,135 @@ static int cf_check amd_cppc_epp_set_policy(struct cpufreq_policy *policy)
     return 0;
 }
 
+int get_amd_cppc_para(const struct cpufreq_policy *policy,
+                      struct xen_cppc_para *cppc_para)
+{
+    const struct amd_cppc_drv_data *data = per_cpu(amd_cppc_drv_data,
+                                                   policy->cpu);
+
+    if ( data == NULL )
+        return -ENODATA;
+
+    cppc_para->policy           = policy->policy;
+    cppc_para->lowest           = data->caps.lowest_perf;
+    cppc_para->lowest_nonlinear = data->caps.lowest_nonlinear_perf;
+    cppc_para->nominal          = data->caps.nominal_perf;
+    cppc_para->highest          = data->caps.highest_perf;
+    cppc_para->minimum          = data->req.min_perf;
+    cppc_para->maximum          = data->req.max_perf;
+    cppc_para->desired          = data->req.des_perf;
+    cppc_para->energy_perf      = data->req.epp;
+
+    return 0;
+}
+
+int set_amd_cppc_para(struct cpufreq_policy *policy,
+                      const struct xen_set_cppc_para *set_cppc)
+{
+    unsigned int cpu = policy->cpu;
+    struct amd_cppc_drv_data *data = per_cpu(amd_cppc_drv_data, cpu);
+    uint8_t max_perf, min_perf, des_perf = 0, epp;
+
+    if ( data == NULL )
+        return -ENOENT;
+
+    /* Validate all parameters */
+    if ( set_cppc->minimum > UINT8_MAX || set_cppc->maximum > UINT8_MAX ||
+         set_cppc->desired > UINT8_MAX || set_cppc->energy_perf > UINT8_MAX )
+        return -EINVAL;
+
+    /* Only allow values if params bit is set. */
+    if ( (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED) &&
+          set_cppc->desired) ||
+         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MINIMUM) &&
+          set_cppc->minimum) ||
+         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MAXIMUM) &&
+          set_cppc->maximum) ||
+         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ENERGY_PERF) &&
+          set_cppc->energy_perf) )
+        return -EINVAL;
+
+    /* Activity window not supported in MSR */
+    if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ACT_WINDOW )
+        return -EOPNOTSUPP;
+
+    /* Return if there is nothing to do. */
+    if ( set_cppc->set_params == 0 )
+        return 0;
+
+    epp = per_cpu(epp_init, cpu);
+    /*
+     * Apply presets:
+     * XEN_SYSCTL_CPPC_SET_DESIRED reflects whether desired perf is set, which
+     * is also the flag to distinguish between passive mode and active mode.
+     * When it is set, CPPC in passive mode, only
+     * XEN_SYSCTL_CPPC_SET_PRESET_NONE could be chosen.
+     * when it is not set, CPPC in active mode, three different profile
+     * XEN_SYSCTL_CPPC_SET_PRESET_POWERSAVE/PERFORMANCE/BALANCE are provided,
+     */
+    switch ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_PRESET_MASK )
+    {
+    case XEN_SYSCTL_CPPC_SET_PRESET_POWERSAVE:
+        if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED )
+            return -EINVAL;
+        policy->policy = CPUFREQ_POLICY_POWERSAVE;
+        min_perf = data->caps.lowest_perf;
+        /* Lower max frequency to lowest */
+        max_perf = data->caps.lowest_perf;
+        epp = CPPC_ENERGY_PERF_MAX_POWERSAVE;
+        break;
+
+    case XEN_SYSCTL_CPPC_SET_PRESET_PERFORMANCE:
+        if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED )
+            return -EINVAL;
+        /* Increase idle frequency to highest */
+        policy->policy = CPUFREQ_POLICY_PERFORMANCE;
+        min_perf = data->caps.highest_perf;
+        max_perf = data->caps.highest_perf;
+        epp = CPPC_ENERGY_PERF_MAX_PERFORMANCE;
+        break;
+
+    case XEN_SYSCTL_CPPC_SET_PRESET_BALANCE:
+        if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED )
+            return -EINVAL;
+        policy->policy = CPUFREQ_POLICY_BALANCE;
+        min_perf = data->caps.lowest_perf;
+        max_perf = data->caps.highest_perf;
+        epp = CPPC_ENERGY_PERF_BALANCE;
+        break;
+
+    case XEN_SYSCTL_CPPC_SET_PRESET_NONE:
+        /*
+         * In paasive mode, Xen governor is responsible for perfomance tuning.
+         * we shall set lowest_perf with "lowest_nonlinear_perf" to ensure
+         * governoring performance in P-states range.
+         */
+        min_perf = data->caps.lowest_nonlinear_perf;
+        max_perf = data->caps.highest_perf;
+        break;
+
+    default:
+        return -EINVAL;
+    }
+
+    /* Further customize presets if needed */
+    if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MINIMUM )
+        min_perf = set_cppc->minimum;
+
+    if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MAXIMUM )
+        max_perf = set_cppc->maximum;
+
+    if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ENERGY_PERF )
+        epp = set_cppc->energy_perf;
+
+    if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED )
+        des_perf = set_cppc->desired;
+
+    amd_cppc_write_request(cpu, min_perf, des_perf, max_perf, epp);
+
+    return 0;
+}
+
 static const struct cpufreq_driver __initconst_cf_clobber
 amd_cppc_cpufreq_driver =
 {
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index e5f375921a..9e1ed29a0a 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -334,6 +334,10 @@ static int get_cpufreq_cppc(struct xen_sysctl_pm_op *op)
     if ( hwp_active() )
         ret = get_hwp_para(op->cpuid, &op->u.cppc_para);
 
+    if ( processor_pminfo[op->cpuid]->init & XEN_CPPC_INIT )
+        ret = get_amd_cppc_para(per_cpu(cpufreq_cpu_policy, op->cpuid),
+                                &op->u.cppc_para);
+
     return ret;
 }
 
@@ -429,10 +433,13 @@ static int set_cpufreq_cppc(struct xen_sysctl_pm_op *op)
     if ( !policy || !policy->governor )
         return -ENOENT;
 
-    if ( !hwp_active() )
-        return -EOPNOTSUPP;
+    if ( hwp_active() )
+        return set_hwp_para(policy, &op->u.set_cppc);
+
+    if ( processor_pminfo[op->cpuid]->init & XEN_CPPC_INIT )
+        return set_amd_cppc_para(policy, &op->u.set_cppc);
 
-    return set_hwp_para(policy, &op->u.set_cppc);
+    return -EOPNOTSUPP;
 }
 
 int do_pm_op(struct xen_sysctl_pm_op *op)
diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h
index 6f31009e82..c542a6e633 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -134,14 +134,16 @@ extern int cpufreq_register_governor(struct cpufreq_governor *governor);
 extern struct cpufreq_governor *__find_governor(const char *governor);
 #define CPUFREQ_DEFAULT_GOVERNOR &cpufreq_gov_dbs
 
-#define CPUFREQ_POLICY_UNKNOWN      0
+#define CPUFREQ_POLICY_UNKNOWN      XEN_CPUFREQ_POLICY_UNKNOWN
 /*
  * If cpufreq_driver->target() exists, the ->governor decides what frequency
  * within the limits is used. If cpufreq_driver->setpolicy() exists, these
  * two generic policies are available:
  */
-#define CPUFREQ_POLICY_POWERSAVE    1
-#define CPUFREQ_POLICY_PERFORMANCE  2
+#define CPUFREQ_POLICY_POWERSAVE    XEN_CPUFREQ_POLICY_POWERSAVE
+#define CPUFREQ_POLICY_PERFORMANCE  XEN_CPUFREQ_POLICY_PERFORMANCE
+/* Achieved only via xenpm XEN_SYSCTL_CPPC_SET_PRESET_BALANCE preset */
+#define CPUFREQ_POLICY_BALANCE      XEN_CPUFREQ_POLICY_BALANCE
 
 unsigned int cpufreq_policy_from_governor(const struct cpufreq_governor *gov);
 
@@ -292,5 +294,9 @@ int acpi_cpufreq_register(void);
 
 int amd_cppc_cmdline_parse(const char *s, const char *e);
 int amd_cppc_register_driver(void);
+int get_amd_cppc_para(const struct cpufreq_policy *policy,
+                      struct xen_cppc_para *cppc_para);
+int set_amd_cppc_para(struct cpufreq_policy *policy,
+                      const struct xen_set_cppc_para *set_cppc);
 
 #endif /* __XEN_CPUFREQ_PM_H__ */
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index 29872fe508..18c38744ae 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -308,6 +308,11 @@ struct xen_ondemand {
 
 struct xen_cppc_para {
     /* OUT */
+#define XEN_CPUFREQ_POLICY_UNKNOWN      0
+#define XEN_CPUFREQ_POLICY_POWERSAVE    1
+#define XEN_CPUFREQ_POLICY_PERFORMANCE  2
+#define XEN_CPUFREQ_POLICY_BALANCE      4
+    uint32_t policy;
     /* activity_window supported if set */
 #define XEN_SYSCTL_CPPC_FEAT_ACT_WINDOW  (1 << 0)
     uint32_t features; /* bit flags for features */
-- 
2.34.1On 27.05.2025 10:48, Penny Zheng wrote:
> +int set_amd_cppc_para(struct cpufreq_policy *policy,
> +                      const struct xen_set_cppc_para *set_cppc)
> +{
> +    unsigned int cpu = policy->cpu;
> +    struct amd_cppc_drv_data *data = per_cpu(amd_cppc_drv_data, cpu);
> +    uint8_t max_perf, min_perf, des_perf = 0, epp;
> +
> +    if ( data == NULL )
> +        return -ENOENT;
> +
> +    /* Validate all parameters */
> +    if ( set_cppc->minimum > UINT8_MAX || set_cppc->maximum > UINT8_MAX ||
> +         set_cppc->desired > UINT8_MAX || set_cppc->energy_perf > UINT8_MAX )
> +        return -EINVAL;
> +
> +    /* Only allow values if params bit is set. */
> +    if ( (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED) &&
> +          set_cppc->desired) ||
> +         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MINIMUM) &&
> +          set_cppc->minimum) ||
> +         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MAXIMUM) &&
> +          set_cppc->maximum) ||
> +         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ENERGY_PERF) &&
> +          set_cppc->energy_perf) )
> +        return -EINVAL;
> +
> +    /* Activity window not supported in MSR */
> +    if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ACT_WINDOW )
> +        return -EOPNOTSUPP;
> +
> +    /* Return if there is nothing to do. */
> +    if ( set_cppc->set_params == 0 )
> +        return 0;
> +
> +    epp = per_cpu(epp_init, cpu);
> +    /*
> +     * Apply presets:
> +     * XEN_SYSCTL_CPPC_SET_DESIRED reflects whether desired perf is set, which
> +     * is also the flag to distinguish between passive mode and active mode.
> +     * When it is set, CPPC in passive mode, only
> +     * XEN_SYSCTL_CPPC_SET_PRESET_NONE could be chosen.
> +     * when it is not set, CPPC in active mode, three different profile
> +     * XEN_SYSCTL_CPPC_SET_PRESET_POWERSAVE/PERFORMANCE/BALANCE are provided,
> +     */
> +    switch ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_PRESET_MASK )
> +    {
> +    case XEN_SYSCTL_CPPC_SET_PRESET_POWERSAVE:
> +        if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED )
> +            return -EINVAL;
> +        policy->policy = CPUFREQ_POLICY_POWERSAVE;
> +        min_perf = data->caps.lowest_perf;
> +        /* Lower max frequency to lowest */
> +        max_perf = data->caps.lowest_perf;
> +        epp = CPPC_ENERGY_PERF_MAX_POWERSAVE;
> +        break;
> +
> +    case XEN_SYSCTL_CPPC_SET_PRESET_PERFORMANCE:
> +        if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED )
> +            return -EINVAL;
> +        /* Increase idle frequency to highest */
> +        policy->policy = CPUFREQ_POLICY_PERFORMANCE;
> +        min_perf = data->caps.highest_perf;
> +        max_perf = data->caps.highest_perf;
> +        epp = CPPC_ENERGY_PERF_MAX_PERFORMANCE;
> +        break;
> +
> +    case XEN_SYSCTL_CPPC_SET_PRESET_BALANCE:
> +        if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED )
> +            return -EINVAL;
> +        policy->policy = CPUFREQ_POLICY_BALANCE;
> +        min_perf = data->caps.lowest_perf;
> +        max_perf = data->caps.highest_perf;
> +        epp = CPPC_ENERGY_PERF_BALANCE;
> +        break;
> +
> +    case XEN_SYSCTL_CPPC_SET_PRESET_NONE:
> +        /*
> +         * In paasive mode, Xen governor is responsible for perfomance tuning.
> +         * we shall set lowest_perf with "lowest_nonlinear_perf" to ensure
> +         * governoring performance in P-states range.
> +         */
> +        min_perf = data->caps.lowest_nonlinear_perf;
> +        max_perf = data->caps.highest_perf;
> +        break;
Oh, also - can you really leave policy->policy unaltered here?
Jan
                
            On 27.05.2025 10:48, Penny Zheng wrote:
> --- a/tools/misc/xenpm.c
> +++ b/tools/misc/xenpm.c
> @@ -38,6 +38,13 @@
>  static xc_interface *xc_handle;
>  static unsigned int max_cpu_nr;
>  
> +static const char cpufreq_policy_str[][12] = {
> +    [XEN_CPUFREQ_POLICY_UNKNOWN] = "none",
Why not "unknown"?
> +    [XEN_CPUFREQ_POLICY_POWERSAVE] = "powersave",
> +    [XEN_CPUFREQ_POLICY_PERFORMANCE] = "performance",
> +    [XEN_CPUFREQ_POLICY_BALANCE] = "balance",
> +};
> +
>  /* help message */
>  void show_help(void)
>  {
> @@ -984,6 +991,9 @@ static void print_cppc_para(unsigned int cpuid,
>      printf("                     : desired [%"PRIu32"%s]\n",
>             cppc->desired,
>             cppc->desired ? "" : " hw autonomous");
> +
> +    printf("  performance policy : %s\n",
> +           cpufreq_policy_str[cppc->policy]);
What if for whatever reason the value you get is 4? Please avoid array overruns
also in user space tools.
> --- a/xen/arch/x86/acpi/cpufreq/amd-cppc.c
> +++ b/xen/arch/x86/acpi/cpufreq/amd-cppc.c
> @@ -506,6 +506,135 @@ static int cf_check amd_cppc_epp_set_policy(struct cpufreq_policy *policy)
>      return 0;
>  }
>  
> +int get_amd_cppc_para(const struct cpufreq_policy *policy,
> +                      struct xen_cppc_para *cppc_para)
> +{
> +    const struct amd_cppc_drv_data *data = per_cpu(amd_cppc_drv_data,
> +                                                   policy->cpu);
> +
> +    if ( data == NULL )
> +        return -ENODATA;
> +
> +    cppc_para->policy           = policy->policy;
> +    cppc_para->lowest           = data->caps.lowest_perf;
> +    cppc_para->lowest_nonlinear = data->caps.lowest_nonlinear_perf;
> +    cppc_para->nominal          = data->caps.nominal_perf;
> +    cppc_para->highest          = data->caps.highest_perf;
> +    cppc_para->minimum          = data->req.min_perf;
> +    cppc_para->maximum          = data->req.max_perf;
> +    cppc_para->desired          = data->req.des_perf;
> +    cppc_para->energy_perf      = data->req.epp;
> +
> +    return 0;
> +}
> +
> +int set_amd_cppc_para(struct cpufreq_policy *policy,
> +                      const struct xen_set_cppc_para *set_cppc)
> +{
> +    unsigned int cpu = policy->cpu;
> +    struct amd_cppc_drv_data *data = per_cpu(amd_cppc_drv_data, cpu);
> +    uint8_t max_perf, min_perf, des_perf = 0, epp;
> +
> +    if ( data == NULL )
> +        return -ENOENT;
> +
> +    /* Validate all parameters */
> +    if ( set_cppc->minimum > UINT8_MAX || set_cppc->maximum > UINT8_MAX ||
> +         set_cppc->desired > UINT8_MAX || set_cppc->energy_perf > UINT8_MAX )
> +        return -EINVAL;
> +
> +    /* Only allow values if params bit is set. */
> +    if ( (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED) &&
> +          set_cppc->desired) ||
> +         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MINIMUM) &&
> +          set_cppc->minimum) ||
> +         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_MAXIMUM) &&
> +          set_cppc->maximum) ||
> +         (!(set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ENERGY_PERF) &&
> +          set_cppc->energy_perf) )
> +        return -EINVAL;
If the respective flag is set, is the field being zero legitimate? In patch
10 you reject finding zero perf values.
> +    /* Activity window not supported in MSR */
> +    if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_ACT_WINDOW )
> +        return -EOPNOTSUPP;
> +
> +    /* Return if there is nothing to do. */
> +    if ( set_cppc->set_params == 0 )
> +        return 0;
> +
> +    epp = per_cpu(epp_init, cpu);
> +    /*
> +     * Apply presets:
> +     * XEN_SYSCTL_CPPC_SET_DESIRED reflects whether desired perf is set, which
> +     * is also the flag to distinguish between passive mode and active mode.
> +     * When it is set, CPPC in passive mode, only
> +     * XEN_SYSCTL_CPPC_SET_PRESET_NONE could be chosen.
> +     * when it is not set, CPPC in active mode, three different profile
> +     * XEN_SYSCTL_CPPC_SET_PRESET_POWERSAVE/PERFORMANCE/BALANCE are provided,
> +     */
> +    switch ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_PRESET_MASK )
> +    {
> +    case XEN_SYSCTL_CPPC_SET_PRESET_POWERSAVE:
> +        if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED )
> +            return -EINVAL;
> +        policy->policy = CPUFREQ_POLICY_POWERSAVE;
> +        min_perf = data->caps.lowest_perf;
> +        /* Lower max frequency to lowest */
> +        max_perf = data->caps.lowest_perf;
> +        epp = CPPC_ENERGY_PERF_MAX_POWERSAVE;
> +        break;
> +
> +    case XEN_SYSCTL_CPPC_SET_PRESET_PERFORMANCE:
> +        if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED )
> +            return -EINVAL;
> +        /* Increase idle frequency to highest */
> +        policy->policy = CPUFREQ_POLICY_PERFORMANCE;
> +        min_perf = data->caps.highest_perf;
> +        max_perf = data->caps.highest_perf;
> +        epp = CPPC_ENERGY_PERF_MAX_PERFORMANCE;
> +        break;
> +
> +    case XEN_SYSCTL_CPPC_SET_PRESET_BALANCE:
> +        if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED )
> +            return -EINVAL;
> +        policy->policy = CPUFREQ_POLICY_BALANCE;
> +        min_perf = data->caps.lowest_perf;
> +        max_perf = data->caps.highest_perf;
> +        epp = CPPC_ENERGY_PERF_BALANCE;
> +        break;
Isn't this more line "ondemand"? To me, "balance" would mean tying perf to at
least close around the middle of lowest and highest.
> +    case XEN_SYSCTL_CPPC_SET_PRESET_NONE:
> +        /*
> +         * In paasive mode, Xen governor is responsible for perfomance tuning.
Nit: passive
> +         * we shall set lowest_perf with "lowest_nonlinear_perf" to ensure
> +         * governoring performance in P-states range.
> +         */
> +        min_perf = data->caps.lowest_nonlinear_perf;
> +        max_perf = data->caps.highest_perf;
As in the earlier patch - I fear I don't understand the comment, and hence why
to use lowest-nonlinear here remains unclear to me.
> --- a/xen/drivers/acpi/pmstat.c
> +++ b/xen/drivers/acpi/pmstat.c
> @@ -334,6 +334,10 @@ static int get_cpufreq_cppc(struct xen_sysctl_pm_op *op)
>      if ( hwp_active() )
>          ret = get_hwp_para(op->cpuid, &op->u.cppc_para);
>  
> +    if ( processor_pminfo[op->cpuid]->init & XEN_CPPC_INIT )
> +        ret = get_amd_cppc_para(per_cpu(cpufreq_cpu_policy, op->cpuid),
> +                                &op->u.cppc_para);
This is a case where I think you would better use "else if". Otherwise it
looks as if both paths could be taken (and "ret" as well as op->u.cppc_para
be overwritten BY this second call).
> --- a/xen/include/acpi/cpufreq/cpufreq.h
> +++ b/xen/include/acpi/cpufreq/cpufreq.h
> @@ -134,14 +134,16 @@ extern int cpufreq_register_governor(struct cpufreq_governor *governor);
>  extern struct cpufreq_governor *__find_governor(const char *governor);
>  #define CPUFREQ_DEFAULT_GOVERNOR &cpufreq_gov_dbs
>  
> -#define CPUFREQ_POLICY_UNKNOWN      0
> +#define CPUFREQ_POLICY_UNKNOWN      XEN_CPUFREQ_POLICY_UNKNOWN
>  /*
>   * If cpufreq_driver->target() exists, the ->governor decides what frequency
>   * within the limits is used. If cpufreq_driver->setpolicy() exists, these
>   * two generic policies are available:
>   */
> -#define CPUFREQ_POLICY_POWERSAVE    1
> -#define CPUFREQ_POLICY_PERFORMANCE  2
> +#define CPUFREQ_POLICY_POWERSAVE    XEN_CPUFREQ_POLICY_POWERSAVE
> +#define CPUFREQ_POLICY_PERFORMANCE  XEN_CPUFREQ_POLICY_PERFORMANCE
> +/* Achieved only via xenpm XEN_SYSCTL_CPPC_SET_PRESET_BALANCE preset */
> +#define CPUFREQ_POLICY_BALANCE      XEN_CPUFREQ_POLICY_BALANCE
We don't need both sets of manifest constants, do we?
Jan
                
            [Public] > -----Original Message----- > From: Jan Beulich <jbeulich@suse.com> > Sent: Tuesday, June 17, 2025 6:38 PM > To: Penny, Zheng <penny.zheng@amd.com> > Cc: Huang, Ray <Ray.Huang@amd.com>; Anthony PERARD > <anthony.perard@vates.tech>; Andrew Cooper <andrew.cooper3@citrix.com>; > Orzel, Michal <Michal.Orzel@amd.com>; Julien Grall <julien@xen.org>; Roger Pau > Monné <roger.pau@citrix.com>; Stefano Stabellini <sstabellini@kernel.org>; xen- > devel@lists.xenproject.org > Subject: Re: [PATCH v5 18/18] xen/cpufreq: Adapt SET/GET_CPUFREQ_CPPC > xen_sysctl_pm_op for amd-cppc driver > > On 27.05.2025 10:48, Penny Zheng wrote: > > --- a/tools/misc/xenpm.c > > +++ b/tools/misc/xenpm.c > > + > > + case XEN_SYSCTL_CPPC_SET_PRESET_BALANCE: > > + if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED ) > > + return -EINVAL; > > + policy->policy = CPUFREQ_POLICY_BALANCE; > > + min_perf = data->caps.lowest_perf; > > + max_perf = data->caps.highest_perf; > > + epp = CPPC_ENERGY_PERF_BALANCE; > > + break; > > Isn't this more line "ondemand"? To me, "balance" would mean tying perf to at least > close around the middle of lowest and highest. > The "balance" word comes from the epp value, it is 127, which is the middle value In actual hardware algorithm, the value of Energy Performance Preference register(EPP) will be translated to frequency setting, And it sets the minimum active frequency. An EPP of zero sets the min active frequency to Fmax, while an EPP of 255 sets the min active frequency to Fmin (~IdleFreq). It is linear scaling, so epp of 127 is calculated to the middle of Fmax and Fmin. And Fmax corresponds to the highest perf, and Fmin corresponds to the non-linear lowest perf > > + case XEN_SYSCTL_CPPC_SET_PRESET_NONE: > > + /* > > + * In paasive mode, Xen governor is responsible for perfomance tuning. > > Nit: passive > > > + * we shall set lowest_perf with "lowest_nonlinear_perf" to ensure > > + * governoring performance in P-states range. > > + */ > > + min_perf = data->caps.lowest_nonlinear_perf; > > + max_perf = data->caps.highest_perf; > > As in the earlier patch - I fear I don't understand the comment, and hence why to > use lowest-nonlinear here remains unclear to me. > After previous discussion, I'll use non-linear lowest in all places > Jan
On 08.07.2025 06:02, Penny, Zheng wrote: >> -----Original Message----- >> From: Jan Beulich <jbeulich@suse.com> >> Sent: Tuesday, June 17, 2025 6:38 PM >> >> On 27.05.2025 10:48, Penny Zheng wrote: >>> --- a/tools/misc/xenpm.c >>> +++ b/tools/misc/xenpm.c >>> + >>> + case XEN_SYSCTL_CPPC_SET_PRESET_BALANCE: >>> + if ( set_cppc->set_params & XEN_SYSCTL_CPPC_SET_DESIRED ) >>> + return -EINVAL; >>> + policy->policy = CPUFREQ_POLICY_BALANCE; >>> + min_perf = data->caps.lowest_perf; >>> + max_perf = data->caps.highest_perf; >>> + epp = CPPC_ENERGY_PERF_BALANCE; >>> + break; >> >> Isn't this more line "ondemand"? To me, "balance" would mean tying perf to at least >> close around the middle of lowest and highest. > > The "balance" word comes from the epp value, it is 127, which is the middle value > In actual hardware algorithm, the value of Energy Performance Preference register(EPP) will be translated to frequency setting, > And it sets the minimum active frequency. > An EPP of zero sets the min active frequency to Fmax, while an EPP of 255 sets the min active frequency to Fmin (~IdleFreq). It is linear scaling, so epp of 127 is calculated to the middle of Fmax and Fmin. > And Fmax corresponds to the highest perf, and Fmin corresponds to the non-linear lowest perf I'm fine with the EPP value picked here. My question - as to the min_perf and max_perf values that you set - remains, though. Jan
© 2016 - 2025 Red Hat, Inc.