X-ZM-MESSAGEID: 1738830846451019000 amd-cppc is the AMD CPU performance scaling driver that introduces a new CPU frequency control mechanism firstly on AMD Zen based CPU series. The new mechanism is based on Collaborative Processor Performance Control (CPPC) which is a finer grain frequency management than legacy ACPI hardware P-States. Current AMD CPU platforms are using the ACPI P-states driver to manage CPU frequency and clocks with switching only in 3 P-states. The new amd-cppc allows a more flexible, low-latency interface for Xen to directly communicate the performance hints to hardware. The first version "amd-cppc" could leverage common governors such as *ondemand*, *performance*, etc, to manage the performance hints. In the future, we will introduce an advanced active mode to enable autonomous performence level selection. Signed-off-by: Penny Zheng --- v1 -> v2: - re-construct union caps and req to have anonymous struct instead - avoid "else" when the earlier if() ends in an unconditional control flow = statement - Add check to avoid chopping off set bits from cast - make pointers pointer-to-const wherever possible - remove noisy log - exclude families before 0x17 before CPPC-feature MSR op - remove useless variable helpers - use xvzalloc and XVFREE - refactor error handling as ENABLE bit can only be cleared by reset --- xen/arch/x86/acpi/cpufreq/amd-cppc.c | 388 +++++++++++++++++++++++++++ 1 file changed, 388 insertions(+) diff --git a/xen/arch/x86/acpi/cpufreq/amd-cppc.c b/xen/arch/x86/acpi/cpufr= eq/amd-cppc.c index 2dca4a00f3..f14e7a6638 100644 --- a/xen/arch/x86/acpi/cpufreq/amd-cppc.c +++ b/xen/arch/x86/acpi/cpufreq/amd-cppc.c @@ -13,7 +13,61 @@ =20 #include #include +#include +#include #include +#include + +#define MSR_AMD_CPPC_CAP1 0xc00102b0 +#define MSR_AMD_CPPC_ENABLE 0xc00102b1 +#define AMD_CPPC_ENABLE BIT(0, ULL) +#define MSR_AMD_CPPC_REQ 0xc00102b3 + +#define amd_cppc_err(cpu, fmt, args...) \ + printk(XENLOG_ERR "AMD_CPPC: CPU%u error: " fmt, cpu, ## args) +#define amd_cppc_verbose(fmt, args...) \ +({ \ + if ( cpufreq_verbose ) \ + printk(XENLOG_DEBUG "AMD_CPPC: " fmt, ## args); \ +}) +#define amd_cppc_warn(fmt, args...) \ + printk(XENLOG_WARNING "AMD_CPPC: CPU%u warning: " fmt, cpu, ## args) + +struct amd_cppc_drv_data +{ + struct xen_processor_cppc *cppc_data; + union + { + uint64_t raw; + struct + { + unsigned int lowest_perf:8; + unsigned int lowest_nonlinear_perf:8; + unsigned int nominal_perf:8; + unsigned int highest_perf:8; + unsigned int :32; + }; + } caps; + union + { + uint64_t raw; + struct + { + unsigned int max_perf:8; + unsigned int min_perf:8; + unsigned int des_perf:8; + unsigned int epp:8; + unsigned int :32; + }; + } req; + int err; + + uint32_t max_freq; + uint32_t min_freq; + uint32_t nominal_freq; +}; + +static DEFINE_PER_CPU_READ_MOSTLY(struct amd_cppc_drv_data *, amd_cppc_drv= _data); =20 static bool __init amd_cppc_handle_option(const char *s, const char *end) { @@ -50,9 +104,343 @@ int __init amd_cppc_cmdline_parse(const char *s, const= char *e) return 0; } =20 +/* + * If CPPC lowest_freq and nominal_freq registers are exposed then we can + * use them to convert perf to freq and vice versa. The conversion is + * extrapolated as an affine function passing by the 2 points: + * - (Low perf, Low freq) + * - (Nominal perf, Nominal freq) + */ +static int amd_cppc_khz_to_perf(const struct amd_cppc_drv_data *data, unsi= gned int freq, uint8_t *perf) +{ + const struct xen_processor_cppc *cppc_data =3D data->cppc_data; + uint64_t mul, div, offset =3D 0, res; + + if ( freq =3D=3D (cppc_data->nominal_freq * 1000) ) + { + *perf =3D data->caps.nominal_perf; + return 0; + } + + if ( freq =3D=3D (cppc_data->lowest_freq * 1000) ) + { + *perf =3D data->caps.lowest_perf; + return 0; + } + + if ( (cppc_data->lowest_freq) && (cppc_data->nominal_freq) ) + { + mul =3D data->caps.nominal_perf - data->caps.lowest_perf; + div =3D cppc_data->nominal_freq - cppc_data->lowest_freq; + /* + * We don't need to convert to kHz for computing offset and can + * directly use nominal_freq and lowest_freq as the division + * will remove the frequency unit. + */ + div =3D div ?: 1; + offset =3D data->caps.nominal_perf - (mul * cppc_data->nominal_fre= q) / div; + } + else + { + /* Read Processor Max Speed(mhz) as anchor point */ + mul =3D data->caps.highest_perf; + div =3D this_cpu(max_freq_mhz); + if ( !div ) + return -EINVAL; + } + + res =3D offset + (mul * freq) / (div * 1000); + if ( res > UINT8_MAX ) + { + printk(XENLOG_ERR "Perf value exceeds maximum value 255: %lu\n", r= es); + return -EINVAL; + } + *perf =3D (uint8_t)res; + + return 0; +} + +static int amd_get_min_freq(const struct amd_cppc_drv_data *data, unsigned= int *min_freq) +{ + const struct xen_processor_cppc *cppc_data =3D data->cppc_data; + uint64_t mul, div, res; + + if ( cppc_data->lowest_freq ) + { + /* Switch to khz */ + *min_freq =3D cppc_data->lowest_freq * 1000; + return 0; + } + + /* Read Processor Max Speed(mhz) as anchor point */ + mul =3D this_cpu(max_freq_mhz); + div =3D data->caps.highest_perf; + res =3D (mul * data->caps.lowest_perf * 1000) / div; + if ( res > UINT_MAX ) + { + printk(XENLOG_ERR "Min freq exceeds maximum value UINT_MAX: %lu\n"= , res); + return -EINVAL; + } + + *min_freq =3D (unsigned int)res; + return 0; +} + +static int amd_get_nominal_freq(const struct amd_cppc_drv_data *data, unsi= gned int *nom_freq) +{ + const struct xen_processor_cppc *cppc_data =3D data->cppc_data; + uint64_t mul, div, res; + + if ( cppc_data->nominal_freq ) + { + /* Switch to khz */ + *nom_freq =3D cppc_data->nominal_freq * 1000; + return 0; + } + + /* Read Processor Max Speed(mhz) as anchor point */ + mul =3D this_cpu(max_freq_mhz); + div =3D data->caps.highest_perf; + res =3D (mul * data->caps.nominal_perf * 1000) / div; + if ( res > UINT_MAX ) + { + printk(XENLOG_ERR "Nominal freq exceeds maximum value UINT_MAX: %l= u\n", res); + return -EINVAL; + } + + *nom_freq =3D (unsigned int)res; + return 0; +} + +static int amd_get_max_freq(const struct amd_cppc_drv_data *data, unsigned= int *max_freq) +{ + unsigned int nom_freq, boost_ratio; + int res; + + res =3D amd_get_nominal_freq(data, &nom_freq); + if ( res ) + return res; + + boost_ratio =3D (unsigned int)(data->caps.highest_perf / data-> minal_perf); + *max_freq =3D nom_freq * boost_ratio; + + return 0; +} + +static int cf_check amd_cppc_cpufreq_verify(struct cpufreq_policy *policy) +{ + const struct amd_cppc_drv_data *data =3D per_cpu(amd_cppc_drv_data, po= licy->cpu); + + cpufreq_verify_within_limits(policy, data->min_freq, data->max_freq); + + return 0; +} + +static void amd_cppc_write_request_msrs(void *info) +{ + struct amd_cppc_drv_data *data =3D info; + + if ( wrmsr_safe(MSR_AMD_CPPC_REQ, data->req.raw) ) + { + data->err =3D -EINVAL; + return; + } + data->err =3D 0; +} + +static int cf_check amd_cppc_write_request(int cpu, uint8_t min_perf, + uint8_t des_perf, uint8_t max_p= erf) +{ + struct amd_cppc_drv_data *data =3D per_cpu(amd_cppc_drv_data, cpu); + uint64_t prev =3D data->req.raw; + + data->req.min_perf =3D min_perf; + data->req.max_perf =3D max_perf; + data->req.des_perf =3D des_perf; + + if ( prev =3D=3D data->req.raw ) + return 0; + + on_selected_cpus(cpumask_of(cpu), amd_cppc_write_request_msrs, data, 1= ); + + return data->err; +} + +static int cf_check amd_cppc_cpufreq_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + unsigned int cpu =3D policy->cpu; + const struct amd_cppc_drv_data *data =3D per_cpu(amd_cppc_drv_data, cp= u); + uint8_t des_perf; + int res; + + if ( unlikely(!target_freq) ) + return 0; + + res =3D amd_cppc_khz_to_perf(data, target_freq, &des_perf); + if ( res ) + return res; + + return amd_cppc_write_request(policy->cpu, data->caps.lowest_nonlinear= _perf, + des_perf, data->caps.highest_perf); +} + +static void cf_check amd_cppc_init_msrs(void *info) +{ + struct cpufreq_policy *policy =3D info; + struct amd_cppc_drv_data *data =3D this_cpu(amd_cppc_drv_data); + uint64_t val; + unsigned int min_freq, nominal_freq, max_freq; + const struct cpuinfo_x86 *c =3D cpu_data + policy->cpu; + + /* Feature CPPC is firstly introduiced on Zen2 */ + if ( c->x86 < 0x17 ) + { + amd_cppc_err(policy->cpu, "Unsupported cpu family: %x\n", c->x86); + data->err =3D -EOPNOTSUPP; + return; + } + + /* Package level MSR */ + if ( rdmsr_safe(MSR_AMD_CPPC_ENABLE, val) ) + { + amd_cppc_err(policy->cpu, "rdmsr_safe(MSR_AMD_CPPC_ENABLE)\n"); + goto err; + } + + /* + * Only when Enable bit is on, the hardware will calculate the process= or=E2=80=99s + * performance capabilities and initialize the performance level field= s in + * the CPPC capability registers. + */ + if ( !(val & AMD_CPPC_ENABLE) ) + { + val |=3D AMD_CPPC_ENABLE; + if ( wrmsr_safe(MSR_AMD_CPPC_ENABLE, val) ) + { + amd_cppc_err(policy->cpu, "wrmsr_safe(MSR_AMD_CPPC_ENABLE, %lx= )\n", val); + goto err; + } + } + + if ( rdmsr_safe(MSR_AMD_CPPC_CAP1, data->caps.raw) ) + { + amd_cppc_err(policy->cpu, "rdmsr_safe(MSR_AMD_CPPC_CAP1)\n"); + goto err; + } + + if ( data->caps.highest_perf =3D=3D 0 || data->caps.lowest_perf =3D=3D= 0 || + data->caps.nominal_perf =3D=3D 0 || data->caps.lowest_nonlinear_p= erf =3D=3D 0 ) + { + amd_cppc_err(policy->cpu, + "Platform malfunction, read CPPC highest_perf: %u, lo= west_perf: %u, nominal_perf: %u, lowest_nonlinear_perf: %u zero value\n", + data->caps.highest_perf, data->caps.lowest_perf, + data->caps.nominal_perf, data->caps.lowest_nonlinear_= perf); + goto err; + } + + data->err =3D amd_get_min_freq(data, &min_freq); + if ( data->err ) + return; + + data->err =3D amd_get_nominal_freq(data, &nominal_freq); + if ( data->err ) + return; + + data->err =3D amd_get_max_freq(data, &max_freq); + if ( data->err ) + return; + + if ( min_freq > max_freq ) + { + amd_cppc_err(policy->cpu, "min_freq(%u) or max_freq(%u) value is i= ncorrect\n", + min_freq, max_freq); + goto err; + } + + policy->min =3D min_freq; + policy->max =3D max_freq; + + policy->cpuinfo.min_freq =3D min_freq; + policy->cpuinfo.max_freq =3D max_freq; + policy->cpuinfo.perf_freq =3D nominal_freq; + policy->cur =3D nominal_freq; + + /* Initial processor data capability frequencies */ + data->min_freq =3D min_freq; + data->nominal_freq =3D nominal_freq; + data->max_freq =3D max_freq; + + return; + + err: + data->err =3D -EINVAL; +} + +/* + * The new AMD CPPC driver is different than legacy ACPI hardware P-State, + * which has a finer grain frequency range between the highest and lowest + * frequency. And boost frequency is actually the frequency which is mappe= d on + * highest performance ratio. The legacy P0 frequency is actually mapped on + * nominal performance ratio. + */ +static void amd_cppc_boost_init(struct cpufreq_policy *policy, const struc= t amd_cppc_drv_data *data) +{ + if ( data->caps.highest_perf <=3D data->caps.nominal_perf ) + return; + + policy->turbo =3D CPUFREQ_TURBO_ENABLED; +} + +static int cf_check amd_cppc_cpufreq_cpu_init(struct cpufreq_policy *polic= y) +{ + unsigned int cpu =3D policy->cpu; + struct amd_cppc_drv_data *data; + + data =3D xvzalloc(struct amd_cppc_drv_data); + if ( !data ) + return -ENOMEM; + + data->cppc_data =3D &processor_pminfo[cpu]->cppc_data; + + per_cpu(amd_cppc_drv_data, cpu) =3D data; + + on_selected_cpus(cpumask_of(cpu), amd_cppc_init_msrs, policy, 1); + + if ( data->err ) + { + amd_cppc_err(cpu, "Could not initialize AMD CPPC MSR properly\n"); + per_cpu(amd_cppc_drv_data, cpu) =3D NULL; + XVFREE(data); + return -ENODEV; + } + + policy->governor =3D cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR; + + amd_cppc_boost_init(policy, data); + + amd_cppc_verbose("CPU %u initialized with amd-cppc passive mode\n", po= licy->cpu); + return 0; +} + +static int cf_check amd_cppc_cpufreq_cpu_exit(struct cpufreq_policy *polic= y) +{ + struct amd_cppc_drv_data *data =3D per_cpu(amd_cppc_drv_data, policy->= cpu); + + per_cpu(amd_cppc_drv_data, policy->cpu) =3D NULL; + XVFREE(data); + + return 0; +} + static const struct cpufreq_driver __initconst_cf_clobber amd_cppc_cpufreq= _driver =3D { .name =3D XEN_AMD_CPPC_DRIVER_NAME, + .verify =3D amd_cppc_cpufreq_verify, + .target =3D amd_cppc_cpufreq_target, + .init =3D amd_cppc_cpufreq_cpu_init, + .exit =3D amd_cppc_cpufreq_cpu_exit, }; =20 int __init amd_cppc_register_driver(void) --=20 2.34.1