[PATCH v3 05/15] xen/x86: introduce "cpufreq=amd-cppc" xen cmdline

Penny Zheng posted 15 patches 3 days, 12 hours ago
[PATCH v3 05/15] xen/x86: introduce "cpufreq=amd-cppc" xen cmdline
Posted by Penny Zheng 3 days, 12 hours ago
Users need to set "cpufreq=amd-cppc" in xen cmdline to enable
amd-cppc driver, which selects ACPI Collaborative Performance
and Power Control (CPPC) on supported AMD hardware to provide a
finer grained frequency control mechanism.
`verbose` option can also be included to support verbose print.

When users setting "cpufreq=amd-cppc", a new amd-cppc driver
shall be registered and used. Actual implmentation will be introduced
in the following commits.

Xen is not expected to support both or mixed mode (CPPC & legacy PSS, _PCT,
_PPC) operations, only one cpufreq driver gets registerd, either amd-cppc or
legacy P-states driver, which is reflected and asserted by the incompatible
flags XEN_PROCESSOR_PM_PX and XEN_PROCESSOR_PM_CPPC.

Signed-off-by: Penny Zheng <Penny.Zheng@amd.com>
---
v1 -> v2:
- Obey to alphabetic sorting and also strict it with CONFIG_AMD
- Remove unnecessary empty comment line
- Use __initconst_cf_clobber for pre-filled structure cpufreq_driver
- Make new switch-case code apply to Hygon CPUs too
- Change ENOSYS with EOPNOTSUPP
- Blanks around binary operator
- Change all amd_/-pstate defined values to amd_/-cppc
---
v2 -> v3
- refactor too long lines
- Make sure XEN_PROCESSOR_PM_PX and XEN_PROCESSOR_PM_CPPC incompatible flags
after cpufreq register registrantion
---
 docs/misc/xen-command-line.pandoc         | 16 +++--
 xen/arch/x86/acpi/cpufreq/Makefile        |  1 +
 xen/arch/x86/acpi/cpufreq/acpi.c          | 12 +++-
 xen/arch/x86/acpi/cpufreq/amd-cppc.c      | 78 +++++++++++++++++++++++
 xen/arch/x86/acpi/cpufreq/cpufreq.c       | 34 +++++++++-
 xen/arch/x86/platform_hypercall.c         | 11 +++-
 xen/drivers/cpufreq/cpufreq.c             | 17 +++++
 xen/include/acpi/cpufreq/cpufreq.h        |  4 ++
 xen/include/acpi/cpufreq/processor_perf.h |  7 +-
 xen/include/public/sysctl.h               |  1 +
 10 files changed, 169 insertions(+), 12 deletions(-)
 create mode 100644 xen/arch/x86/acpi/cpufreq/amd-cppc.c

diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
index a440042471..b3c3ca2377 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -515,7 +515,7 @@ If set, force use of the performance counters for oprofile, rather than detectin
 available support.
 
 ### cpufreq
-> `= none | {{ <boolean> | xen } { [:[powersave|performance|ondemand|userspace][,[<maxfreq>]][,[<minfreq>]]] } [,verbose]} | dom0-kernel | hwp[:[<hdc>][,verbose]]`
+> `= none | {{ <boolean> | xen } { [:[powersave|performance|ondemand|userspace][,[<maxfreq>]][,[<minfreq>]]] } [,verbose]} | dom0-kernel | hwp[:[<hdc>][,verbose]] | amd-cppc[:[verbose]]`
 
 > Default: `xen`
 
@@ -526,7 +526,7 @@ choice of `dom0-kernel` is deprecated and not supported by all Dom0 kernels.
 * `<maxfreq>` and `<minfreq>` are integers which represent max and min processor frequencies
   respectively.
 * `verbose` option can be included as a string or also as `verbose=<integer>`
-  for `xen`.  It is a boolean for `hwp`.
+  for `xen`.  It is a boolean for `hwp` and `amd-cppc`.
 * `hwp` selects Hardware-Controlled Performance States (HWP) on supported Intel
   hardware.  HWP is a Skylake+ feature which provides better CPU power
   management.  The default is disabled.  If `hwp` is selected, but hardware
@@ -534,13 +534,17 @@ choice of `dom0-kernel` is deprecated and not supported by all Dom0 kernels.
 * `<hdc>` is a boolean to enable Hardware Duty Cycling (HDC).  HDC enables the
   processor to autonomously force physical package components into idle state.
   The default is enabled, but the option only applies when `hwp` is enabled.
+* `amd-cppc` selects ACPI Collaborative Performance and Power Control (CPPC)
+  on supported AMD hardware to provide finer grained frequency control
+  mechanism. The default is disabled.
 
 User could use `;`-separated options to support universal options which they
 would like to try on any agnostic platform, *but* under priority order, like
-`cpufreq=hwp;xen,verbose`.  This first tries `hwp` and falls back to `xen` if
-unavailable.  Note: The `verbose` suboption is handled globally.  Setting it
-for either the primary or fallback option applies to both irrespective of where
-it is specified.
+`cpufreq=hwp;amd-cppc;xen,verbose`. This first tries `hwp` on Intel, or
+`amd-cppc` on AMD, and it will fall back to `xen` if unavailable. Note:
+The `verbose` suboption is handled globally.  Setting it for either the
+primary or fallback option applies to both irrespective of where it is
+specified.
 
 Note: grub2 requires to escape or quote ';', so `"cpufreq=hwp;xen"` should be
 specified within double quotes inside grub.cfg.  Refer to the grub2
diff --git a/xen/arch/x86/acpi/cpufreq/Makefile b/xen/arch/x86/acpi/cpufreq/Makefile
index e7dbe434a8..a2ba34bda0 100644
--- a/xen/arch/x86/acpi/cpufreq/Makefile
+++ b/xen/arch/x86/acpi/cpufreq/Makefile
@@ -1,4 +1,5 @@
 obj-$(CONFIG_INTEL) += acpi.o
+obj-$(CONFIG_AMD) += amd-cppc.o
 obj-y += cpufreq.o
 obj-$(CONFIG_INTEL) += hwp.o
 obj-$(CONFIG_AMD) += powernow.o
diff --git a/xen/arch/x86/acpi/cpufreq/acpi.c b/xen/arch/x86/acpi/cpufreq/acpi.c
index 0cf94ab2d6..49c1795b25 100644
--- a/xen/arch/x86/acpi/cpufreq/acpi.c
+++ b/xen/arch/x86/acpi/cpufreq/acpi.c
@@ -13,6 +13,7 @@
 
 #include <xen/errno.h>
 #include <xen/delay.h>
+#include <xen/domain.h>
 #include <xen/param.h>
 #include <xen/types.h>
 
@@ -514,5 +515,14 @@ acpi_cpufreq_driver = {
 
 int __init acpi_cpufreq_register(void)
 {
-    return cpufreq_register_driver(&acpi_cpufreq_driver);
+    int ret;
+
+    ret = cpufreq_register_driver(&acpi_cpufreq_driver);
+    if ( ret )
+        return ret;
+
+    if ( IS_ENABLED(CONFIG_AMD) )
+        xen_processor_pmbits &= ~XEN_PROCESSOR_PM_CPPC;
+
+    return ret;
 }
diff --git a/xen/arch/x86/acpi/cpufreq/amd-cppc.c b/xen/arch/x86/acpi/cpufreq/amd-cppc.c
new file mode 100644
index 0000000000..7d482140a2
--- /dev/null
+++ b/xen/arch/x86/acpi/cpufreq/amd-cppc.c
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * amd-cppc.c - AMD Processor CPPC Frequency Driver
+ *
+ * Copyright (C) 2025 Advanced Micro Devices, Inc. All Rights Reserved.
+ *
+ * Author: Penny Zheng <penny.zheng@amd.com>
+ *
+ * AMD CPPC cpufreq driver introduces a new CPU performance scaling design
+ * for AMD processors using the ACPI Collaborative Performance and Power
+ * Control (CPPC) feature which provides finer grained frequency control range.
+ */
+
+#include <xen/domain.h>
+#include <xen/init.h>
+#include <xen/param.h>
+#include <acpi/cpufreq/cpufreq.h>
+
+static bool __init amd_cppc_handle_option(const char *s, const char *end)
+{
+    int ret;
+
+    ret = parse_boolean("verbose", s, end);
+    if ( ret >= 0 )
+    {
+        cpufreq_verbose = ret;
+        return true;
+    }
+
+    return false;
+}
+
+int __init amd_cppc_cmdline_parse(const char *s, const char *e)
+{
+    do
+    {
+        const char *end = strpbrk(s, ",;");
+
+        if ( !amd_cppc_handle_option(s, end) )
+        {
+            printk(XENLOG_WARNING
+                   "cpufreq/amd-cppc: option '%.*s' not recognized\n",
+                   (int)((end ?: e) - s), s);
+
+            return -EINVAL;
+        }
+
+        s = end ? end + 1 : NULL;
+    } while ( s && s < e );
+
+    return 0;
+}
+
+static const struct cpufreq_driver __initconst_cf_clobber
+amd_cppc_cpufreq_driver =
+{
+    .name   = XEN_AMD_CPPC_DRIVER_NAME,
+};
+
+int __init amd_cppc_register_driver(void)
+{
+    int ret;
+
+    if ( !cpu_has_cppc )
+    {
+        xen_processor_pmbits &= ~XEN_PROCESSOR_PM_CPPC;
+        return -ENODEV;
+    }
+
+    ret = cpufreq_register_driver(&amd_cppc_cpufreq_driver);
+    if ( ret )
+        return ret;
+
+    /* Remove possible fallback option */
+    xen_processor_pmbits &= ~XEN_PROCESSOR_PM_PX;
+
+    return ret;
+}
diff --git a/xen/arch/x86/acpi/cpufreq/cpufreq.c b/xen/arch/x86/acpi/cpufreq/cpufreq.c
index 61e98b67bd..690a285f11 100644
--- a/xen/arch/x86/acpi/cpufreq/cpufreq.c
+++ b/xen/arch/x86/acpi/cpufreq/cpufreq.c
@@ -148,6 +148,10 @@ static int __init cf_check cpufreq_driver_init(void)
                 case CPUFREQ_none:
                     ret = 0;
                     break;
+                default:
+                    printk(XENLOG_WARNING
+                           "Unsupported cpufreq driver for vendor Intel\n");
+                    break;
                 }
 
                 if ( ret != -ENODEV )
@@ -157,7 +161,35 @@ static int __init cf_check cpufreq_driver_init(void)
 
         case X86_VENDOR_AMD:
         case X86_VENDOR_HYGON:
-            ret = IS_ENABLED(CONFIG_AMD) ? powernow_register_driver() : -ENODEV;
+            if ( !IS_ENABLED(CONFIG_AMD) )
+            {
+                ret = -ENODEV;
+                break;
+            }
+            ret = -ENOENT;
+
+            for ( unsigned int i = 0; i < cpufreq_xen_cnt; i++ )
+            {
+                switch ( cpufreq_xen_opts[i] )
+                {
+                case CPUFREQ_xen:
+                    ret = powernow_register_driver();
+                    break;
+                case CPUFREQ_amd_cppc:
+                    ret = amd_cppc_register_driver();
+                    break;
+                case CPUFREQ_none:
+                    ret = 0;
+                    break;
+                default:
+                    printk(XENLOG_WARNING
+                           "Unsupported cpufreq driver for vendor AMD\n");
+                    break;
+                }
+
+                if ( ret != -ENODEV )
+                    break;
+            }
             break;
         }
     }
diff --git a/xen/arch/x86/platform_hypercall.c b/xen/arch/x86/platform_hypercall.c
index 77390a0dbd..5dd1ba2949 100644
--- a/xen/arch/x86/platform_hypercall.c
+++ b/xen/arch/x86/platform_hypercall.c
@@ -542,6 +542,7 @@ ret_t do_platform_op(
                 ret = -ENOSYS;
                 break;
             }
+            ASSERT(!(xen_processor_pmbits & XEN_PROCESSOR_PM_CPPC));
             ret = set_px_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.u.perf);
             break;
  
@@ -572,7 +573,8 @@ ret_t do_platform_op(
             break;
         }
         case XEN_PM_PSD:
-            if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
+            if ( !(xen_processor_pmbits & (XEN_PROCESSOR_PM_PX |
+                                           XEN_PROCESSOR_PM_CPPC)) )
             {
                 ret = -EOPNOTSUPP;
                 break;
@@ -584,6 +586,13 @@ ret_t do_platform_op(
             break;
 
         case XEN_PM_CPPC:
+            if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CPPC) )
+            {
+                ret = -EOPNOTSUPP;
+                break;
+            }
+            ASSERT(!(xen_processor_pmbits & XEN_PROCESSOR_PM_PX));
+
             ret = set_cppc_pminfo(op->u.set_pminfo.id,
                                   &op->u.set_pminfo.u.cppc_data);
             break;
diff --git a/xen/drivers/cpufreq/cpufreq.c b/xen/drivers/cpufreq/cpufreq.c
index cfae16c15f..792e4dc02c 100644
--- a/xen/drivers/cpufreq/cpufreq.c
+++ b/xen/drivers/cpufreq/cpufreq.c
@@ -111,6 +111,19 @@ static int __init cpufreq_cmdline_parse_hwp(const char *arg, const char *end)
     return ret;
 }
 
+static int __init cpufreq_cmdline_parse_cppc(const char *arg, const char *end)
+{
+    int ret = 0;
+
+    xen_processor_pmbits |= XEN_PROCESSOR_PM_CPPC;
+    cpufreq_controller = FREQCTL_xen;
+    cpufreq_xen_opts[cpufreq_xen_cnt++] = CPUFREQ_amd_cppc;
+    if ( arg[0] && arg[1] )
+        ret = amd_cppc_cmdline_parse(arg + 1, end);
+
+    return ret;
+}
+
 static int __init cf_check setup_cpufreq_option(const char *str)
 {
     const char *arg = strpbrk(str, ",:;");
@@ -159,6 +172,10 @@ static int __init cf_check setup_cpufreq_option(const char *str)
                   !cmdline_strcmp(str, "hwp") &&
                   !cpufreq_opts_contain(CPUFREQ_hwp) )
             ret = cpufreq_cmdline_parse_hwp(arg, end);
+        else if ( IS_ENABLED(CONFIG_AMD) && choice < 0 &&
+                  !cmdline_strcmp(str, "amd-cppc") &&
+                  !cpufreq_opts_contain(CPUFREQ_amd_cppc) )
+            ret = cpufreq_cmdline_parse_cppc(arg, end);
         else
             ret = -EINVAL;
 
diff --git a/xen/include/acpi/cpufreq/cpufreq.h b/xen/include/acpi/cpufreq/cpufreq.h
index 3f1b05a02e..a6fb10ea27 100644
--- a/xen/include/acpi/cpufreq/cpufreq.h
+++ b/xen/include/acpi/cpufreq/cpufreq.h
@@ -28,6 +28,7 @@ enum cpufreq_xen_opt {
     CPUFREQ_none,
     CPUFREQ_xen,
     CPUFREQ_hwp,
+    CPUFREQ_amd_cppc,
 };
 extern enum cpufreq_xen_opt cpufreq_xen_opts[2];
 extern unsigned int cpufreq_xen_cnt;
@@ -267,4 +268,7 @@ int set_hwp_para(struct cpufreq_policy *policy,
 
 int acpi_cpufreq_register(void);
 
+int amd_cppc_cmdline_parse(const char *s, const char *e);
+int amd_cppc_register_driver(void);
+
 #endif /* __XEN_CPUFREQ_PM_H__ */
diff --git a/xen/include/acpi/cpufreq/processor_perf.h b/xen/include/acpi/cpufreq/processor_perf.h
index 33edf112a0..ee12e0192b 100644
--- a/xen/include/acpi/cpufreq/processor_perf.h
+++ b/xen/include/acpi/cpufreq/processor_perf.h
@@ -6,9 +6,10 @@
 #include <xen/acpi.h>
 
 /* ability bits */
-#define XEN_PROCESSOR_PM_CX 1
-#define XEN_PROCESSOR_PM_PX 2
-#define XEN_PROCESSOR_PM_TX 4
+#define XEN_PROCESSOR_PM_CX     1
+#define XEN_PROCESSOR_PM_PX     2
+#define XEN_PROCESSOR_PM_TX     4
+#define XEN_PROCESSOR_PM_CPPC   8
 
 #define XEN_CPPC_INIT 0x40000000U
 #define XEN_PX_INIT   0x80000000U
diff --git a/xen/include/public/sysctl.h b/xen/include/public/sysctl.h
index b0fec271d3..42997252ef 100644
--- a/xen/include/public/sysctl.h
+++ b/xen/include/public/sysctl.h
@@ -423,6 +423,7 @@ struct xen_set_cppc_para {
     uint32_t activity_window;
 };
 
+#define XEN_AMD_CPPC_DRIVER_NAME "amd-cppc"
 #define XEN_HWP_DRIVER_NAME "hwp"
 
 /*
-- 
2.34.1