In order to provide backward compatibility with existing governors
that represent performance as frequencies, like ondemand, the _CPC
table can optionally provide processor frequency range values, Lowest
frequency and Norminal frequency, to let OS use Lowest Frequency/
Performance and Nominal Frequency/Performance as anchor points to
create linear mapping of CPPC abstract performance to CPU frequency.
As Xen is uncapable of parsing the ACPI dynamic table, this commit
introduces a new sub-hypercall to propagate required CPPC data from
dom0 kernel.
If the platform supports CPPC, the _CPC object must exist under all
processor objects. That is, Xen is not expected to support mixed mode
(CPPC & legacy PSS, _PCT, _PPC) operation, either advanced CPPC, or legacy
P-states.
This commit also introduces a new flag XEN_PM_CPPC to reflect processor
initialised in CPPC mode.
Signed-off-by: Penny Zheng <Penny.Zheng@amd.com>
---
v1 -> v2:
- Remove unnecessary figure braces
- Pointer-to-const for print_CPPC and set_cppc_pminfo
- Structure allocation shall use xvzalloc()
- Unnecessary memcpy(), and change it to a (type safe) structure assignment
- Add comment for struct xen_processor_cppc, and keep the chosen fields
in the order _CPC has them
- Obey to alphabetic sorting, and prefix compat structures with ? instead
of !
---
v2 -> v3:
- Trim too long line
- Re-place set_cppc_pminfo() past set_px_pminfo()
- Fix Misra violations: Declaration and definition ought to agree
in parameter names
- Introduce a new flag XEN_PM_CPPC to reflect processor initialised in CPPC
mode
---
xen/arch/x86/platform_hypercall.c | 5 +++
xen/arch/x86/x86_64/cpufreq.c | 2 +
xen/drivers/acpi/pmstat.c | 4 +-
xen/drivers/cpufreq/cpufreq.c | 53 +++++++++++++++++++++--
xen/include/acpi/cpufreq/processor_perf.h | 8 ++--
xen/include/public/platform.h | 16 +++++++
xen/include/xen/pmstat.h | 2 +
xen/include/xlat.lst | 1 +
8 files changed, 82 insertions(+), 9 deletions(-)
diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c
index b0d98b5840..77390a0dbd 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -583,6 +583,11 @@ ret_t do_platform_op(
&op->u.set_pminfo.u.domain_info);
break;
+ case XEN_PM_CPPC:
+ ret = set_cppc_pminfo(op->u.set_pminfo.id,
+ &op->u.set_pminfo.u.cppc_data);
+ break;
+
default:
ret = -EINVAL;
break;
diff --git a/xen/arch/x86/x86_64/cpufreq.c b/xen/arch/x86/x86_64/cpufreq.c
index d1b93b8eef..565e4f8652 100644
--- a/xen/arch/x86/x86_64/cpufreq.c
+++ b/xen/arch/x86/x86_64/cpufreq.c
@@ -26,6 +26,8 @@
#include <xen/pmstat.h>
#include <compat/platform.h>
+CHECK_processor_cppc;
+
CHECK_processor_px;
CHECK_psd_package;
diff --git a/xen/drivers/acpi/pmstat.c b/xen/drivers/acpi/pmstat.c
index df309e27b4..c8e00766a6 100644
--- a/xen/drivers/acpi/pmstat.c
+++ b/xen/drivers/acpi/pmstat.c
@@ -68,7 +68,7 @@ int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
return -ENODEV;
if ( hwp_active() )
return -EOPNOTSUPP;
- if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) )
+ if ( !pmpt || !(pmpt->init & XEN_PX_INIT) )
return -EINVAL;
break;
default:
@@ -467,7 +467,7 @@ int do_pm_op(struct xen_sysctl_pm_op *op)
case CPUFREQ_PARA:
if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
return -ENODEV;
- if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) )
+ if ( !pmpt || !(pmpt->init & (XEN_PX_INIT | XEN_CPPC_INIT)) )
return -EINVAL;
break;
}
diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index 638476ca15..894bafebaa 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -210,7 +210,7 @@ int cpufreq_add_cpu(unsigned int cpu)
pmpt = processor_pminfo[cpu];
- if ( !(pmpt->perf.init & XEN_PX_INIT) )
+ if ( !(pmpt->init & (XEN_PX_INIT | XEN_CPPC_INIT)) )
return -EINVAL;
if (!cpufreq_driver.init)
@@ -368,7 +368,7 @@ int cpufreq_del_cpu(unsigned int cpu)
pmpt = processor_pminfo[cpu];
- if ( !(pmpt->perf.init & XEN_PX_INIT) )
+ if ( !(pmpt->init & (XEN_PX_INIT | XEN_CPPC_INIT)) )
return -EINVAL;
if (!per_cpu(cpufreq_cpu_policy, cpu))
@@ -459,6 +459,16 @@ static void print_PPC(unsigned int platform_limit)
printk("\t_PPC: %d\n", platform_limit);
}
+static void print_CPPC(const struct xen_processor_cppc *cppc_data)
+{
+ printk("\t_CPC: highest_perf=%u, lowest_perf=%u, "
+ "nominal_perf=%u, lowest_nonlinear_perf=%u, "
+ "nominal_mhz=%uMHz, lowest_mhz=%uMHz\n",
+ cppc_data->highest_perf, cppc_data->lowest_perf,
+ cppc_data->nominal_perf, cppc_data->lowest_nonlinear_perf,
+ cppc_data->nominal_mhz, cppc_data->lowest_mhz);
+}
+
int set_px_pminfo(uint32_t acpi_id, struct xen_processor_performance *perf)
{
int ret = 0, cpu;
@@ -539,7 +549,7 @@ int set_px_pminfo(uint32_t acpi_id, struct xen_processor_performance *perf)
if ( cpufreq_verbose )
print_PPC(pxpt->platform_limit);
- if ( pxpt->init == XEN_PX_INIT )
+ if ( pmpt->init == XEN_PX_INIT )
{
ret = cpufreq_limit_change(cpu);
goto out;
@@ -548,7 +558,7 @@ int set_px_pminfo(uint32_t acpi_id, struct xen_processor_performance *perf)
if ( perf->flags == ( XEN_PX_PCT | XEN_PX_PSS | XEN_PX_PPC ) )
{
- pxpt->init = XEN_PX_INIT;
+ pmpt->init = XEN_PX_INIT;
ret = cpufreq_cpu_init(cpu);
goto out;
@@ -606,6 +616,41 @@ int set_psd_pminfo(uint32_t acpi_id, uint32_t shared_type,
return ret;
}
+int set_cppc_pminfo(uint32_t acpi_id,
+ const struct xen_processor_cppc *cppc_data)
+{
+ int ret = 0, cpuid;
+ struct processor_pminfo *pm_info;
+
+ cpuid = get_cpu_id(acpi_id);
+ if ( cpuid < 0 || !cppc_data )
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+ if ( cpufreq_verbose )
+ printk("Set CPU acpi_id(%d) cpuid(%d) CPPC State info:\n",
+ acpi_id, cpuid);
+
+ pm_info = processor_pminfo[cpuid];
+ /* Must already allocated in set_psd_pminfo */
+ if ( !pm_info )
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+ pm_info->cppc_data = *cppc_data;
+
+ if ( cpufreq_verbose )
+ print_CPPC(&pm_info->cppc_data);
+
+ pm_info->init = XEN_CPPC_INIT;
+ ret = cpufreq_cpu_init(cpuid);
+
+ out:
+ return ret;
+}
+
static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy)
{
if (usr_max_freq)
diff --git a/xen/include/acpi/cpufreq/processor_perf.h b/xen/include/acpi/cpufreq/processor_perf.h
index 19f5de6b08..12b6e6b826 100644
--- a/xen/include/acpi/cpufreq/processor_perf.h
+++ b/xen/include/acpi/cpufreq/processor_perf.h
@@ -5,7 +5,8 @@
#include <public/sysctl.h>
#include <xen/acpi.h>
-#define XEN_PX_INIT 0x80000000U
+#define XEN_CPPC_INIT 0x40000000U
+#define XEN_PX_INIT 0x80000000U
unsigned int powernow_register_driver(void);
unsigned int get_measured_perf(unsigned int cpu, unsigned int flag);
@@ -27,8 +28,6 @@ struct processor_performance {
struct xen_pct_register status_register;
uint32_t state_count;
struct xen_processor_px *states;
-
- uint32_t init;
};
struct processor_pminfo {
@@ -37,6 +36,9 @@ struct processor_pminfo {
struct xen_psd_package domain_info;
uint32_t shared_type;
struct processor_performance perf;
+ struct xen_processor_cppc cppc_data;
+
+ uint32_t init;
};
extern struct processor_pminfo *processor_pminfo[NR_CPUS];
diff --git a/xen/include/public/platform.h b/xen/include/public/platform.h
index f5c50380cb..07f4b72014 100644
--- a/xen/include/public/platform.h
+++ b/xen/include/public/platform.h
@@ -364,6 +364,7 @@ DEFINE_XEN_GUEST_HANDLE(xenpf_getidletime_t);
#define XEN_PM_TX 2
#define XEN_PM_PDC 3
#define XEN_PM_PSD 4
+#define XEN_PM_CPPC 5
/* Px sub info type */
#define XEN_PX_PCT 1
@@ -432,6 +433,20 @@ struct xen_processor_px {
typedef struct xen_processor_px xen_processor_px_t;
DEFINE_XEN_GUEST_HANDLE(xen_processor_px_t);
+/*
+ * Subset _CPC fields useful for CPPC-compatible cpufreq
+ * driver's initialization
+ */
+struct xen_processor_cppc {
+ uint32_t highest_perf;
+ uint32_t nominal_perf;
+ uint32_t lowest_nonlinear_perf;
+ uint32_t lowest_perf;
+ uint32_t lowest_mhz;
+ uint32_t nominal_mhz;
+};
+typedef struct xen_processor_cppc xen_processor_cppc_t;
+
struct xen_psd_package {
uint64_t num_entries;
uint64_t revision;
@@ -461,6 +476,7 @@ struct xenpf_set_processor_pminfo {
xen_psd_package_t domain_info; /* _PSD */
struct xen_processor_performance perf; /* Px: _PPC/_PCT/_PSS/ */
XEN_GUEST_HANDLE(uint32) pdc; /* _PDC */
+ xen_processor_cppc_t cppc_data; /*_CPC */
} u;
/* Coordination type of this processor */
#define XEN_CPUPERF_SHARED_TYPE_HW 1 /* HW does needed coordination */
diff --git a/xen/include/xen/pmstat.h b/xen/include/xen/pmstat.h
index fd02316ce9..c223f417fd 100644
--- a/xen/include/xen/pmstat.h
+++ b/xen/include/xen/pmstat.h
@@ -8,6 +8,8 @@
int set_psd_pminfo(uint32_t acpi_id, uint32_t shared_type,
const struct xen_psd_package *psd_data);
int set_px_pminfo(uint32_t acpi_id, struct xen_processor_performance *perf);
+int set_cppc_pminfo(uint32_t acpi_id,
+ const struct xen_processor_cppc *cppc_data);
long set_cx_pminfo(uint32_t acpi_id, struct xen_processor_power *power);
#ifdef CONFIG_COMPAT
diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
index 0d964fe0ce..3f47552a22 100644
--- a/xen/include/xlat.lst
+++ b/xen/include/xlat.lst
@@ -162,6 +162,7 @@
! pct_register platform.h
! power_register platform.h
+? processor_cppc platform.h
? processor_csd platform.h
! processor_cx platform.h
! processor_flags platform.h
--
2.34.1
On 06.03.2025 09:39, Penny Zheng wrote:
> In order to provide backward compatibility with existing governors
> that represent performance as frequencies, like ondemand, the _CPC
> table can optionally provide processor frequency range values, Lowest
> frequency and Norminal frequency, to let OS use Lowest Frequency/
> Performance and Nominal Frequency/Performance as anchor points to
> create linear mapping of CPPC abstract performance to CPU frequency.
>
> As Xen is uncapable of parsing the ACPI dynamic table, this commit
> introduces a new sub-hypercall to propagate required CPPC data from
> dom0 kernel.
Nit: Here and ...
> If the platform supports CPPC, the _CPC object must exist under all
> processor objects. That is, Xen is not expected to support mixed mode
> (CPPC & legacy PSS, _PCT, _PPC) operation, either advanced CPPC, or legacy
> P-states.
>
> This commit also introduces a new flag XEN_PM_CPPC to reflect processor
> initialised in CPPC mode.
... here and elsewhere: Please avoid "this commit", "this patch", or
anything alike in patch descriptions.
Apart from this I'm not sure how useful review here is going to be, as there
apparently is a dependency on the problematic aspect in patch 1. Therefore
I'll give only a few independent comments.
> @@ -606,6 +616,41 @@ int set_psd_pminfo(uint32_t acpi_id, uint32_t shared_type,
> return ret;
> }
>
> +int set_cppc_pminfo(uint32_t acpi_id,
> + const struct xen_processor_cppc *cppc_data)
> +{
> + int ret = 0, cpuid;
> + struct processor_pminfo *pm_info;
> +
> + cpuid = get_cpu_id(acpi_id);
> + if ( cpuid < 0 || !cppc_data )
> + {
> + ret = -EINVAL;
> + goto out;
> + }
> + if ( cpufreq_verbose )
> + printk("Set CPU acpi_id(%d) cpuid(%d) CPPC State info:\n",
> + acpi_id, cpuid);
Nit: %d isn't appropriate for a variable/parameter of type uint32_t. In turn I
don't think the parameter needs to be of a fixed-width type; unsigned int will
be quite fine there, I expect. See ./CODING_STYLE.
> + pm_info = processor_pminfo[cpuid];
> + /* Must already allocated in set_psd_pminfo */
> + if ( !pm_info )
> + {
> + ret = -EINVAL;
> + goto out;
> + }
> + pm_info->cppc_data = *cppc_data;
> +
> + if ( cpufreq_verbose )
> + print_CPPC(&pm_info->cppc_data);
> +
> + pm_info->init = XEN_CPPC_INIT;
That is - whichever Dom0 invoked last will have data recorded, and the other
effectively is discarded? I think a warning (perhaps a one-time one) is minimally
needed to diagnose the case where one type of data replaces the other.
With this it also remains unclear to me how fallback to the legacy driver is
intended to be working. Both taken together are a strong suggestion that important
information on the model that is being implemented is missing from the description.
> @@ -27,8 +28,6 @@ struct processor_performance {
> struct xen_pct_register status_register;
> uint32_t state_count;
> struct xen_processor_px *states;
> -
> - uint32_t init;
> };
>
> struct processor_pminfo {
> @@ -37,6 +36,9 @@ struct processor_pminfo {
> struct xen_psd_package domain_info;
> uint32_t shared_type;
> struct processor_performance perf;
> + struct xen_processor_cppc cppc_data;
> +
> + uint32_t init;
> };
This moving of the "init" field and the mechanical changes coming with it
can likely be split out to a separate patch? Provided of course the movement
is still wanted/needed with patch 1 re-worked or dropped.
Jan
[Public]
Hi,
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Monday, March 24, 2025 10:28 PM
> To: Penny, Zheng <penny.zheng@amd.com>
> Cc: Huang, Ray <Ray.Huang@amd.com>; Andrew Cooper
> <andrew.cooper3@citrix.com>; Roger Pau Monné <roger.pau@citrix.com>;
> Anthony PERARD <anthony.perard@vates.tech>; Orzel, Michal
> <Michal.Orzel@amd.com>; Julien Grall <julien@xen.org>; Stefano Stabellini
> <sstabellini@kernel.org>; xen-devel@lists.xenproject.org
> Subject: Re: [PATCH v3 02/15] xen/x86: introduce new sub-hypercall to propagate
> CPPC data
>
> On 06.03.2025 09:39, Penny Zheng wrote:
> > + pm_info = processor_pminfo[cpuid];
> > + /* Must already allocated in set_psd_pminfo */
> > + if ( !pm_info )
> > + {
> > + ret = -EINVAL;
> > + goto out;
> > + }
> > + pm_info->cppc_data = *cppc_data;
> > +
> > + if ( cpufreq_verbose )
> > + print_CPPC(&pm_info->cppc_data);
> > +
> > + pm_info->init = XEN_CPPC_INIT;
>
> That is - whichever Dom0 invoked last will have data recorded, and the other
> effectively is discarded? I think a warning (perhaps a one-time one) is minimally
> needed to diagnose the case where one type of data replaces the other.
>
In last v2 discussion, we are discussing that either set_px_pminfo or set_cppc_pminfo shall be invoked,
which means either PX data is recorded, or CPPC data is recorded.
Current logic is that, cpufreq cmdline logic will set the XEN_PROCESSOR_PM_PX/CPPC
flag to reflect user preference, if user defines the fallback option, like "cpufreq=amd-cppc,xen", we will have both
XEN_PROCESSOR_PM_PX | XEN_PROCESSOR_PM_CPPC set in the beginning.
Later in cpufreq driver register logic, as only one register could be registered , if amd-cppc
being registered successfully, it will clear the XEN_PROCESSOR_PM_PX flag bit.
But if it fails to register, fallback scheme kicks off, we will try the legacy P-states, in the mean time,
clearing the XEN_PROCESSOR_PM_CPPC.
We are trying to make XEN_PROCESSOR_PM_PX and XEN_PROCESSOR_PM_CPPC exclusive
values after driver registration, which will ensure us that either set_px_pminfo or set_cppc_pminfo
is taken in the runtime.
> With this it also remains unclear to me how fallback to the legacy driver is intended
> to be working. Both taken together are a strong suggestion that important
> information on the model that is being implemented is missing from the description.
>
> > @@ -27,8 +28,6 @@ struct processor_performance {
> > struct xen_pct_register status_register;
> > uint32_t state_count;
> > struct xen_processor_px *states;
> > -
> > - uint32_t init;
> > };
> >
> > struct processor_pminfo {
> > @@ -37,6 +36,9 @@ struct processor_pminfo {
> > struct xen_psd_package domain_info;
> > uint32_t shared_type;
> > struct processor_performance perf;
> > + struct xen_processor_cppc cppc_data;
> > +
> > + uint32_t init;
> > };
>
> This moving of the "init" field and the mechanical changes coming with it can likely
> be split out to a separate patch? Provided of course the movement is still
> wanted/needed with patch 1 re-worked or dropped.
>
> Jan
On 25.03.2025 05:12, Penny, Zheng wrote:
>> -----Original Message-----
>> From: Jan Beulich <jbeulich@suse.com>
>> Sent: Monday, March 24, 2025 10:28 PM
>>
>> On 06.03.2025 09:39, Penny Zheng wrote:
>>> + pm_info = processor_pminfo[cpuid];
>>> + /* Must already allocated in set_psd_pminfo */
>>> + if ( !pm_info )
>>> + {
>>> + ret = -EINVAL;
>>> + goto out;
>>> + }
>>> + pm_info->cppc_data = *cppc_data;
>>> +
>>> + if ( cpufreq_verbose )
>>> + print_CPPC(&pm_info->cppc_data);
>>> +
>>> + pm_info->init = XEN_CPPC_INIT;
>>
>> That is - whichever Dom0 invoked last will have data recorded, and the other
>> effectively is discarded? I think a warning (perhaps a one-time one) is minimally
>> needed to diagnose the case where one type of data replaces the other.
>>
>
> In last v2 discussion, we are discussing that either set_px_pminfo or set_cppc_pminfo shall be invoked,
> which means either PX data is recorded, or CPPC data is recorded.
> Current logic is that, cpufreq cmdline logic will set the XEN_PROCESSOR_PM_PX/CPPC
> flag to reflect user preference, if user defines the fallback option, like "cpufreq=amd-cppc,xen", we will have both
> XEN_PROCESSOR_PM_PX | XEN_PROCESSOR_PM_CPPC set in the beginning.
> Later in cpufreq driver register logic, as only one register could be registered , if amd-cppc
> being registered successfully, it will clear the XEN_PROCESSOR_PM_PX flag bit.
> But if it fails to register, fallback scheme kicks off, we will try the legacy P-states, in the mean time,
> clearing the XEN_PROCESSOR_PM_CPPC.
> We are trying to make XEN_PROCESSOR_PM_PX and XEN_PROCESSOR_PM_CPPC exclusive
> values after driver registration, which will ensure us that either set_px_pminfo or set_cppc_pminfo
> is taken in the runtime.
Yet you realize that this implies Dom0 to know what configuration Xen uses,
in order to know which data to upload. The best approach might be to have
Dom0 upload all data it has, with us merely ignoring what we can't make use
of. The order of uploading (CPPC first or CPPC last) shouldn't matter. Then
(and only then, and - ftaod - only when uploading of the "wrong" kind of
data doesn't result in an error) things can go without warning.
Jan
[Public]
Hi,
> -----Original Message-----
> From: Jan Beulich <jbeulich@suse.com>
> Sent: Tuesday, March 25, 2025 3:54 PM
> To: Penny, Zheng <penny.zheng@amd.com>
> Cc: Huang, Ray <Ray.Huang@amd.com>; Andrew Cooper
> <andrew.cooper3@citrix.com>; Roger Pau Monné <roger.pau@citrix.com>;
> Anthony PERARD <anthony.perard@vates.tech>; Orzel, Michal
> <Michal.Orzel@amd.com>; Julien Grall <julien@xen.org>; Stefano Stabellini
> <sstabellini@kernel.org>; xen-devel@lists.xenproject.org
> Subject: Re: [PATCH v3 02/15] xen/x86: introduce new sub-hypercall to propagate
> CPPC data
>
> On 25.03.2025 05:12, Penny, Zheng wrote:
> >> -----Original Message-----
> >> From: Jan Beulich <jbeulich@suse.com>
> >> Sent: Monday, March 24, 2025 10:28 PM
> >>
> >> On 06.03.2025 09:39, Penny Zheng wrote:
> >>> + pm_info = processor_pminfo[cpuid];
> >>> + /* Must already allocated in set_psd_pminfo */
> >>> + if ( !pm_info )
> >>> + {
> >>> + ret = -EINVAL;
> >>> + goto out;
> >>> + }
> >>> + pm_info->cppc_data = *cppc_data;
> >>> +
> >>> + if ( cpufreq_verbose )
> >>> + print_CPPC(&pm_info->cppc_data);
> >>> +
> >>> + pm_info->init = XEN_CPPC_INIT;
> >>
> >> That is - whichever Dom0 invoked last will have data recorded, and
> >> the other effectively is discarded? I think a warning (perhaps a
> >> one-time one) is minimally needed to diagnose the case where one type of
> data replaces the other.
> >>
> >
> > In last v2 discussion, we are discussing that either set_px_pminfo or
> > set_cppc_pminfo shall be invoked, which means either PX data is recorded, or
> CPPC data is recorded.
> > Current logic is that, cpufreq cmdline logic will set the
> > XEN_PROCESSOR_PM_PX/CPPC flag to reflect user preference, if user
> > defines the fallback option, like "cpufreq=amd-cppc,xen", we will have both
> XEN_PROCESSOR_PM_PX | XEN_PROCESSOR_PM_CPPC set in the
> beginning.
> > Later in cpufreq driver register logic, as only one register could be
> > registered , if amd-cppc being registered successfully, it will clear the
> XEN_PROCESSOR_PM_PX flag bit.
> > But if it fails to register, fallback scheme kicks off, we will try
> > the legacy P-states, in the mean time, clearing the
> XEN_PROCESSOR_PM_CPPC.
> > We are trying to make XEN_PROCESSOR_PM_PX and
> XEN_PROCESSOR_PM_CPPC
> > exclusive values after driver registration, which will ensure us that
> > either set_px_pminfo or set_cppc_pminfo is taken in the runtime.
>
> Yet you realize that this implies Dom0 to know what configuration Xen uses, in
> order to know which data to upload. The best approach might be to have
> Dom0 upload all data it has, with us merely ignoring what we can't make use of.
PLZ correct me if I understand you wrongly:
Right now, I was letting DOM0 upload all data it has, and in the Xen:
```
case XEN_PM_CPPC:
if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CPPC) )
{
ret = -EOPNOTSUPPED;
break;
}
ret = set_cppc_pminfo(op->u.set_pminfo.id,
&op->u.set_pminfo.u.cppc_data);
break;
case XEN_PM_PX:
if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
{
ret = -EOPNOTSUPPED;
break;
}
ret = set_px_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.u.perf);
break;
```
I relied on flag XEN_PROCESSOR_PM_CPPC and XEN_PROCESSOR_PM_PX to choose which
info we shall record.
Firstly, we shall not return -EOPNOTSUPPED error above there.
> The order of uploading (CPPC first or CPPC last) shouldn't matter. Then (and only
> then, and - ftaod - only when uploading of the "wrong" kind of data doesn't result in
> an error) things can go without warning.
Then in
```
pm_info->init = XEN_CPPC_INIT;
ret = cpufreq_cpu_init(cpuid);
```
We shall add warning here to clarify no fallback scheme to replace now, when ret is not zero.
>
> Jan
On 28.03.2025 09:27, Penny, Zheng wrote:
> [Public]
>
> Hi,
>
>> -----Original Message-----
>> From: Jan Beulich <jbeulich@suse.com>
>> Sent: Tuesday, March 25, 2025 3:54 PM
>> To: Penny, Zheng <penny.zheng@amd.com>
>> Cc: Huang, Ray <Ray.Huang@amd.com>; Andrew Cooper
>> <andrew.cooper3@citrix.com>; Roger Pau Monné <roger.pau@citrix.com>;
>> Anthony PERARD <anthony.perard@vates.tech>; Orzel, Michal
>> <Michal.Orzel@amd.com>; Julien Grall <julien@xen.org>; Stefano Stabellini
>> <sstabellini@kernel.org>; xen-devel@lists.xenproject.org
>> Subject: Re: [PATCH v3 02/15] xen/x86: introduce new sub-hypercall to propagate
>> CPPC data
>>
>> On 25.03.2025 05:12, Penny, Zheng wrote:
>>>> -----Original Message-----
>>>> From: Jan Beulich <jbeulich@suse.com>
>>>> Sent: Monday, March 24, 2025 10:28 PM
>>>>
>>>> On 06.03.2025 09:39, Penny Zheng wrote:
>>>>> + pm_info = processor_pminfo[cpuid];
>>>>> + /* Must already allocated in set_psd_pminfo */
>>>>> + if ( !pm_info )
>>>>> + {
>>>>> + ret = -EINVAL;
>>>>> + goto out;
>>>>> + }
>>>>> + pm_info->cppc_data = *cppc_data;
>>>>> +
>>>>> + if ( cpufreq_verbose )
>>>>> + print_CPPC(&pm_info->cppc_data);
>>>>> +
>>>>> + pm_info->init = XEN_CPPC_INIT;
>>>>
>>>> That is - whichever Dom0 invoked last will have data recorded, and
>>>> the other effectively is discarded? I think a warning (perhaps a
>>>> one-time one) is minimally needed to diagnose the case where one type of
>> data replaces the other.
>>>>
>>>
>>> In last v2 discussion, we are discussing that either set_px_pminfo or
>>> set_cppc_pminfo shall be invoked, which means either PX data is recorded, or
>> CPPC data is recorded.
>>> Current logic is that, cpufreq cmdline logic will set the
>>> XEN_PROCESSOR_PM_PX/CPPC flag to reflect user preference, if user
>>> defines the fallback option, like "cpufreq=amd-cppc,xen", we will have both
>> XEN_PROCESSOR_PM_PX | XEN_PROCESSOR_PM_CPPC set in the
>> beginning.
>>> Later in cpufreq driver register logic, as only one register could be
>>> registered , if amd-cppc being registered successfully, it will clear the
>> XEN_PROCESSOR_PM_PX flag bit.
>>> But if it fails to register, fallback scheme kicks off, we will try
>>> the legacy P-states, in the mean time, clearing the
>> XEN_PROCESSOR_PM_CPPC.
>>> We are trying to make XEN_PROCESSOR_PM_PX and
>> XEN_PROCESSOR_PM_CPPC
>>> exclusive values after driver registration, which will ensure us that
>>> either set_px_pminfo or set_cppc_pminfo is taken in the runtime.
>>
>> Yet you realize that this implies Dom0 to know what configuration Xen uses, in
>> order to know which data to upload. The best approach might be to have
>> Dom0 upload all data it has, with us merely ignoring what we can't make use of.
>
> PLZ correct me if I understand you wrongly:
> Right now, I was letting DOM0 upload all data it has, and in the Xen:
> ```
> case XEN_PM_CPPC:
> if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CPPC) )
> {
> ret = -EOPNOTSUPPED;
> break;
> }
> ret = set_cppc_pminfo(op->u.set_pminfo.id,
> &op->u.set_pminfo.u.cppc_data);
> break;
>
> case XEN_PM_PX:
> if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
> {
> ret = -EOPNOTSUPPED;
> break;
> }
> ret = set_px_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.u.perf);
> break;
> ```
> I relied on flag XEN_PROCESSOR_PM_CPPC and XEN_PROCESSOR_PM_PX to choose which
> info we shall record.
> Firstly, we shall not return -EOPNOTSUPPED error above there.
Yes.
>> The order of uploading (CPPC first or CPPC last) shouldn't matter. Then (and only
>> then, and - ftaod - only when uploading of the "wrong" kind of data doesn't result in
>> an error) things can go without warning.
>
> Then in
> ```
> pm_info->init = XEN_CPPC_INIT;
> ret = cpufreq_cpu_init(cpuid);
> ```
> We shall add warning here to clarify no fallback scheme to replace now, when ret is not zero.
Maybe. In the earlier reply I said with certain conditions fulfilled a warning
may not be necessary. Yet perhaps initially having a warning there (maybe just
for debug builds) may make sense.
Jan
© 2016 - 2026 Red Hat, Inc.