From nobody Tue Dec 23 12:36:58 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 7B40DC0015E for ; Fri, 21 Jul 2023 15:50:43 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S232348AbjGUPum (ORCPT ); Fri, 21 Jul 2023 11:50:42 -0400 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:50114 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S232314AbjGUPu2 (ORCPT ); Fri, 21 Jul 2023 11:50:28 -0400 Received: from foss.arm.com (foss.arm.com [217.140.110.172]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id 7A27A359D; Fri, 21 Jul 2023 08:50:19 -0700 (PDT) Received: from usa-sjc-imap-foss1.foss.arm.com (unknown [10.121.207.14]) by usa-sjc-mx-foss1.foss.arm.com (Postfix) with ESMTP id 1883914BF; Fri, 21 Jul 2023 08:51:02 -0700 (PDT) Received: from e129166.arm.com (unknown [10.57.0.79]) by usa-sjc-imap-foss1.foss.arm.com (Postfix) with ESMTPA id B9A423F844; Fri, 21 Jul 2023 08:50:15 -0700 (PDT) From: Lukasz Luba To: linux-kernel@vger.kernel.org, linux-pm@vger.kernel.org, rafael@kernel.org Cc: lukasz.luba@arm.com, dietmar.eggemann@arm.com, rui.zhang@intel.com, amit.kucheria@verdurent.com, amit.kachhap@gmail.com, daniel.lezcano@linaro.org, viresh.kumar@linaro.org, len.brown@intel.com, pavel@ucw.cz, Pierre.Gondois@arm.com, ionela.voinescu@arm.com, mhiramat@kernel.org Subject: [PATCH v3 06/12] PM: EM: Refactor struct em_perf_domain and add default_table Date: Fri, 21 Jul 2023 16:50:16 +0100 Message-Id: <20230721155022.2339982-7-lukasz.luba@arm.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: <20230721155022.2339982-1-lukasz.luba@arm.com> References: <20230721155022.2339982-1-lukasz.luba@arm.com> MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" The Energy Model is going to support runtime modifications. Refactor old implementation which accessed struct em_perf_state and introduce em_perf_domain::default_table to clean up the design. This new field will help to better distinguish 2 performance state tables. Update all drivers or frameworks which used the old field: em_perf_domain::table and now should use em_perf_domain::default_table. Signed-off-by: Lukasz Luba --- drivers/powercap/dtpm_cpu.c | 27 +++++++++++++++++++-------- drivers/powercap/dtpm_devfreq.c | 23 ++++++++++++++++------- drivers/thermal/cpufreq_cooling.c | 23 +++++++++++++++-------- drivers/thermal/devfreq_cooling.c | 23 +++++++++++++++++------ include/linux/energy_model.h | 14 ++++++++++++-- kernel/power/energy_model.c | 22 ++++++++++++++++++---- 6 files changed, 97 insertions(+), 35 deletions(-) diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c index 2ff7717530bf..743a0ac8ecdf 100644 --- a/drivers/powercap/dtpm_cpu.c +++ b/drivers/powercap/dtpm_cpu.c @@ -43,6 +43,7 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 powe= r_limit) { struct dtpm_cpu *dtpm_cpu =3D to_dtpm_cpu(dtpm); struct em_perf_domain *pd =3D em_cpu_get(dtpm_cpu->cpu); + struct em_perf_state *table; struct cpumask cpus; unsigned long freq; u64 power; @@ -51,19 +52,21 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 po= wer_limit) cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus)); nr_cpus =3D cpumask_weight(&cpus); =20 + table =3D pd->default_table->state; + for (i =3D 0; i < pd->nr_perf_states; i++) { =20 - power =3D pd->table[i].power * nr_cpus; + power =3D table[i].power * nr_cpus; =20 if (power > power_limit) break; } =20 - freq =3D pd->table[i - 1].frequency; + freq =3D table[i - 1].frequency; =20 freq_qos_update_request(&dtpm_cpu->qos_req, freq); =20 - power_limit =3D pd->table[i - 1].power * nr_cpus; + power_limit =3D table[i - 1].power * nr_cpus; =20 return power_limit; } @@ -88,12 +91,14 @@ static u64 scale_pd_power_uw(struct cpumask *pd_mask, u= 64 power) static u64 get_pd_power_uw(struct dtpm *dtpm) { struct dtpm_cpu *dtpm_cpu =3D to_dtpm_cpu(dtpm); + struct em_perf_state *table; struct em_perf_domain *pd; struct cpumask *pd_mask; unsigned long freq; int i; =20 pd =3D em_cpu_get(dtpm_cpu->cpu); + table =3D pd->default_table->state; =20 pd_mask =3D em_span_cpus(pd); =20 @@ -101,10 +106,10 @@ static u64 get_pd_power_uw(struct dtpm *dtpm) =20 for (i =3D 0; i < pd->nr_perf_states; i++) { =20 - if (pd->table[i].frequency < freq) + if (table[i].frequency < freq) continue; =20 - return scale_pd_power_uw(pd_mask, pd->table[i].power * + return scale_pd_power_uw(pd_mask, table[i].power * MICROWATT_PER_MILLIWATT); } =20 @@ -115,17 +120,20 @@ static int update_pd_power_uw(struct dtpm *dtpm) { struct dtpm_cpu *dtpm_cpu =3D to_dtpm_cpu(dtpm); struct em_perf_domain *em =3D em_cpu_get(dtpm_cpu->cpu); + struct em_perf_state *table; struct cpumask cpus; int nr_cpus; =20 cpumask_and(&cpus, cpu_online_mask, to_cpumask(em->cpus)); nr_cpus =3D cpumask_weight(&cpus); =20 - dtpm->power_min =3D em->table[0].power; + table =3D em->default_table->state; + + dtpm->power_min =3D table[0].power; dtpm->power_min *=3D MICROWATT_PER_MILLIWATT; dtpm->power_min *=3D nr_cpus; =20 - dtpm->power_max =3D em->table[em->nr_perf_states - 1].power; + dtpm->power_max =3D table[em->nr_perf_states - 1].power; dtpm->power_max *=3D MICROWATT_PER_MILLIWATT; dtpm->power_max *=3D nr_cpus; =20 @@ -182,6 +190,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *paren= t) { struct dtpm_cpu *dtpm_cpu; struct cpufreq_policy *policy; + struct em_perf_state *table; struct em_perf_domain *pd; char name[CPUFREQ_NAME_LEN]; int ret =3D -ENOMEM; @@ -198,6 +207,8 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *paren= t) if (!pd || em_is_artificial(pd)) return -EINVAL; =20 + table =3D pd->default_table->state; + dtpm_cpu =3D kzalloc(sizeof(*dtpm_cpu), GFP_KERNEL); if (!dtpm_cpu) return -ENOMEM; @@ -216,7 +227,7 @@ static int __dtpm_cpu_setup(int cpu, struct dtpm *paren= t) =20 ret =3D freq_qos_add_request(&policy->constraints, &dtpm_cpu->qos_req, FREQ_QOS_MAX, - pd->table[pd->nr_perf_states - 1].frequency); + table[pd->nr_perf_states - 1].frequency); if (ret) goto out_dtpm_unregister; =20 diff --git a/drivers/powercap/dtpm_devfreq.c b/drivers/powercap/dtpm_devfre= q.c index 91276761a31d..6ef0f2b4a683 100644 --- a/drivers/powercap/dtpm_devfreq.c +++ b/drivers/powercap/dtpm_devfreq.c @@ -37,11 +37,14 @@ static int update_pd_power_uw(struct dtpm *dtpm) struct devfreq *devfreq =3D dtpm_devfreq->devfreq; struct device *dev =3D devfreq->dev.parent; struct em_perf_domain *pd =3D em_pd_get(dev); + struct em_perf_state *table; =20 - dtpm->power_min =3D pd->table[0].power; + table =3D pd->default_table->state; + + dtpm->power_min =3D table[0].power; dtpm->power_min *=3D MICROWATT_PER_MILLIWATT; =20 - dtpm->power_max =3D pd->table[pd->nr_perf_states - 1].power; + dtpm->power_max =3D table[pd->nr_perf_states - 1].power; dtpm->power_max *=3D MICROWATT_PER_MILLIWATT; =20 return 0; @@ -53,22 +56,25 @@ static u64 set_pd_power_limit(struct dtpm *dtpm, u64 po= wer_limit) struct devfreq *devfreq =3D dtpm_devfreq->devfreq; struct device *dev =3D devfreq->dev.parent; struct em_perf_domain *pd =3D em_pd_get(dev); + struct em_perf_state *table; unsigned long freq; u64 power; int i; =20 + table =3D pd->default_table->state; + for (i =3D 0; i < pd->nr_perf_states; i++) { =20 - power =3D pd->table[i].power * MICROWATT_PER_MILLIWATT; + power =3D table[i].power * MICROWATT_PER_MILLIWATT; if (power > power_limit) break; } =20 - freq =3D pd->table[i - 1].frequency; + freq =3D table[i - 1].frequency; =20 dev_pm_qos_update_request(&dtpm_devfreq->qos_req, freq); =20 - power_limit =3D pd->table[i - 1].power * MICROWATT_PER_MILLIWATT; + power_limit =3D table[i - 1].power * MICROWATT_PER_MILLIWATT; =20 return power_limit; } @@ -94,6 +100,7 @@ static u64 get_pd_power_uw(struct dtpm *dtpm) struct device *dev =3D devfreq->dev.parent; struct em_perf_domain *pd =3D em_pd_get(dev); struct devfreq_dev_status status; + struct em_perf_state *table; unsigned long freq; u64 power; int i; @@ -102,15 +109,17 @@ static u64 get_pd_power_uw(struct dtpm *dtpm) status =3D devfreq->last_status; mutex_unlock(&devfreq->lock); =20 + table =3D pd->default_table->state; + freq =3D DIV_ROUND_UP(status.current_frequency, HZ_PER_KHZ); _normalize_load(&status); =20 for (i =3D 0; i < pd->nr_perf_states; i++) { =20 - if (pd->table[i].frequency < freq) + if (table[i].frequency < freq) continue; =20 - power =3D pd->table[i].power * MICROWATT_PER_MILLIWATT; + power =3D table[i].power * MICROWATT_PER_MILLIWATT; power *=3D status.busy_time; power >>=3D 10; =20 diff --git a/drivers/thermal/cpufreq_cooling.c b/drivers/thermal/cpufreq_co= oling.c index e2cc7bd30862..1d979c5e05ed 100644 --- a/drivers/thermal/cpufreq_cooling.c +++ b/drivers/thermal/cpufreq_cooling.c @@ -91,10 +91,11 @@ struct cpufreq_cooling_device { static unsigned long get_level(struct cpufreq_cooling_device *cpufreq_cdev, unsigned int freq) { + struct em_perf_state *table =3D cpufreq_cdev->em->default_table->state; int i; =20 for (i =3D cpufreq_cdev->max_level - 1; i >=3D 0; i--) { - if (freq > cpufreq_cdev->em->table[i].frequency) + if (freq > table[i].frequency) break; } =20 @@ -104,15 +105,16 @@ static unsigned long get_level(struct cpufreq_cooling= _device *cpufreq_cdev, static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_cdev, u32 freq) { + struct em_perf_state *table =3D cpufreq_cdev->em->default_table->state; unsigned long power_mw; int i; =20 for (i =3D cpufreq_cdev->max_level - 1; i >=3D 0; i--) { - if (freq > cpufreq_cdev->em->table[i].frequency) + if (freq > table[i].frequency) break; } =20 - power_mw =3D cpufreq_cdev->em->table[i + 1].power; + power_mw =3D table[i + 1].power; power_mw /=3D MICROWATT_PER_MILLIWATT; =20 return power_mw; @@ -121,18 +123,19 @@ static u32 cpu_freq_to_power(struct cpufreq_cooling_d= evice *cpufreq_cdev, static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_cdev, u32 power) { + struct em_perf_state *table =3D cpufreq_cdev->em->default_table->state; unsigned long em_power_mw; int i; =20 for (i =3D cpufreq_cdev->max_level; i > 0; i--) { /* Convert EM power to milli-Watts to make safe comparison */ - em_power_mw =3D cpufreq_cdev->em->table[i].power; + em_power_mw =3D table[i].power; em_power_mw /=3D MICROWATT_PER_MILLIWATT; if (power >=3D em_power_mw) break; } =20 - return cpufreq_cdev->em->table[i].frequency; + return table[i].frequency; } =20 /** @@ -262,8 +265,9 @@ static int cpufreq_get_requested_power(struct thermal_c= ooling_device *cdev, static int cpufreq_state2power(struct thermal_cooling_device *cdev, unsigned long state, u32 *power) { - unsigned int freq, num_cpus, idx; struct cpufreq_cooling_device *cpufreq_cdev =3D cdev->devdata; + unsigned int freq, num_cpus, idx; + struct em_perf_state *table; =20 /* Request state should be less than max_level */ if (state > cpufreq_cdev->max_level) @@ -271,8 +275,9 @@ static int cpufreq_state2power(struct thermal_cooling_d= evice *cdev, =20 num_cpus =3D cpumask_weight(cpufreq_cdev->policy->cpus); =20 + table =3D cpufreq_cdev->em->default_table->state; idx =3D cpufreq_cdev->max_level - state; - freq =3D cpufreq_cdev->em->table[idx].frequency; + freq =3D table[idx].frequency; *power =3D cpu_freq_to_power(cpufreq_cdev, freq) * num_cpus; =20 return 0; @@ -378,8 +383,10 @@ static unsigned int get_state_freq(struct cpufreq_cool= ing_device *cpufreq_cdev, #ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR /* Use the Energy Model table if available */ if (cpufreq_cdev->em) { + struct em_perf_state *table; + table =3D cpufreq_cdev->em->default_table->state; idx =3D cpufreq_cdev->max_level - state; - return cpufreq_cdev->em->table[idx].frequency; + return table[idx].frequency; } #endif =20 diff --git a/drivers/thermal/devfreq_cooling.c b/drivers/thermal/devfreq_co= oling.c index 262e62ab6cf2..4207ef850582 100644 --- a/drivers/thermal/devfreq_cooling.c +++ b/drivers/thermal/devfreq_cooling.c @@ -87,6 +87,7 @@ static int devfreq_cooling_set_cur_state(struct thermal_c= ooling_device *cdev, struct devfreq_cooling_device *dfc =3D cdev->devdata; struct devfreq *df =3D dfc->devfreq; struct device *dev =3D df->dev.parent; + struct em_perf_state *table; unsigned long freq; int perf_idx; =20 @@ -99,8 +100,9 @@ static int devfreq_cooling_set_cur_state(struct thermal_= cooling_device *cdev, return -EINVAL; =20 if (dfc->em_pd) { + table =3D dfc->em_pd->default_table->state; perf_idx =3D dfc->max_state - state; - freq =3D dfc->em_pd->table[perf_idx].frequency * 1000; + freq =3D table[perf_idx].frequency * 1000; } else { freq =3D dfc->freq_table[state]; } @@ -123,10 +125,11 @@ static int devfreq_cooling_set_cur_state(struct therm= al_cooling_device *cdev, */ static int get_perf_idx(struct em_perf_domain *em_pd, unsigned long freq) { + struct em_perf_state *table =3D em_pd->default_table->state; int i; =20 for (i =3D 0; i < em_pd->nr_perf_states; i++) { - if (em_pd->table[i].frequency =3D=3D freq) + if (table[i].frequency =3D=3D freq) return i; } =20 @@ -181,6 +184,7 @@ static int devfreq_cooling_get_requested_power(struct t= hermal_cooling_device *cd struct devfreq_cooling_device *dfc =3D cdev->devdata; struct devfreq *df =3D dfc->devfreq; struct devfreq_dev_status status; + struct em_perf_state *table; unsigned long state; unsigned long freq; unsigned long voltage; @@ -192,6 +196,8 @@ static int devfreq_cooling_get_requested_power(struct t= hermal_cooling_device *cd =20 freq =3D status.current_frequency; =20 + table =3D dfc->em_pd->default_table->state; + if (dfc->power_ops && dfc->power_ops->get_real_power) { voltage =3D get_voltage(df, freq); if (voltage =3D=3D 0) { @@ -204,7 +210,7 @@ static int devfreq_cooling_get_requested_power(struct t= hermal_cooling_device *cd state =3D dfc->capped_state; =20 /* Convert EM power into milli-Watts first */ - dfc->res_util =3D dfc->em_pd->table[state].power; + dfc->res_util =3D table[state].power; dfc->res_util /=3D MICROWATT_PER_MILLIWATT; =20 dfc->res_util *=3D SCALE_ERROR_MITIGATION; @@ -225,7 +231,7 @@ static int devfreq_cooling_get_requested_power(struct t= hermal_cooling_device *cd _normalize_load(&status); =20 /* Convert EM power into milli-Watts first */ - *power =3D dfc->em_pd->table[perf_idx].power; + *power =3D table[perf_idx].power; *power /=3D MICROWATT_PER_MILLIWATT; /* Scale power for utilization */ *power *=3D status.busy_time; @@ -245,13 +251,15 @@ static int devfreq_cooling_state2power(struct thermal= _cooling_device *cdev, unsigned long state, u32 *power) { struct devfreq_cooling_device *dfc =3D cdev->devdata; + struct em_perf_state *table; int perf_idx; =20 if (state > dfc->max_state) return -EINVAL; =20 + table =3D dfc->em_pd->default_table->state; perf_idx =3D dfc->max_state - state; - *power =3D dfc->em_pd->table[perf_idx].power; + *power =3D table[perf_idx].power; *power /=3D MICROWATT_PER_MILLIWATT; =20 return 0; @@ -264,6 +272,7 @@ static int devfreq_cooling_power2state(struct thermal_c= ooling_device *cdev, struct devfreq *df =3D dfc->devfreq; struct devfreq_dev_status status; unsigned long freq, em_power_mw; + struct em_perf_state *table; s32 est_power; int i; =20 @@ -273,6 +282,8 @@ static int devfreq_cooling_power2state(struct thermal_c= ooling_device *cdev, =20 freq =3D status.current_frequency; =20 + table =3D dfc->em_pd->default_table->state; + if (dfc->power_ops && dfc->power_ops->get_real_power) { /* Scale for resource utilization */ est_power =3D power * dfc->res_util; @@ -290,7 +301,7 @@ static int devfreq_cooling_power2state(struct thermal_c= ooling_device *cdev, */ for (i =3D dfc->max_state; i > 0; i--) { /* Convert EM power to milli-Watts to make safe comparison */ - em_power_mw =3D dfc->em_pd->table[i].power; + em_power_mw =3D table[i].power; em_power_mw /=3D MICROWATT_PER_MILLIWATT; if (est_power >=3D em_power_mw) break; diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index 8069f526c9d8..90c0822b664b 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -36,9 +36,19 @@ struct em_perf_state { */ #define EM_PERF_STATE_INEFFICIENT BIT(0) =20 +/** + * struct em_perf_table - Performance states table + * @state: List of performance states, in ascending order + * @rcu: RCU used for safe access and destruction + */ +struct em_perf_table { + struct em_perf_state *state; + struct rcu_head rcu; +}; + /** * struct em_perf_domain - Performance domain - * @table: List of performance states, in ascending order + * @default_table: Pointer to the default em_perf_table * @nr_perf_states: Number of performance states * @flags: See "em_perf_domain flags" * @cpus: Cpumask covering the CPUs of the domain. It's here @@ -53,7 +63,7 @@ struct em_perf_state { * field is unused. */ struct em_perf_domain { - struct em_perf_state *table; + struct em_perf_table *default_table; int nr_perf_states; unsigned long flags; unsigned long cpus[]; diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c index 5ecb73b36995..6cd94f92701d 100644 --- a/kernel/power/energy_model.c +++ b/kernel/power/energy_model.c @@ -66,6 +66,7 @@ DEFINE_SHOW_ATTRIBUTE(em_debug_flags); =20 static void em_debug_create_pd(struct device *dev) { + struct em_perf_table *table =3D dev->em_pd->default_table; struct dentry *d; int i; =20 @@ -81,7 +82,7 @@ static void em_debug_create_pd(struct device *dev) =20 /* Create a sub-directory for each performance state */ for (i =3D 0; i < dev->em_pd->nr_perf_states; i++) - em_debug_create_ps(&dev->em_pd->table[i], d); + em_debug_create_ps(&table->state[i], d); =20 } =20 @@ -196,7 +197,7 @@ static int em_create_perf_table(struct device *dev, str= uct em_perf_domain *pd, if (ret) goto free_ps_table; =20 - pd->table =3D table; + pd->default_table->state =3D table; pd->nr_perf_states =3D nr_states; =20 return 0; @@ -210,6 +211,7 @@ static int em_create_pd(struct device *dev, int nr_stat= es, struct em_data_callback *cb, cpumask_t *cpus, unsigned long flags) { + struct em_perf_table *default_table; struct em_perf_domain *pd; struct device *cpu_dev; int cpu, ret, num_cpus; @@ -234,8 +236,17 @@ static int em_create_pd(struct device *dev, int nr_sta= tes, return -ENOMEM; } =20 + default_table =3D kzalloc(sizeof(*default_table), GFP_KERNEL); + if (!default_table) { + kfree(pd); + return -ENOMEM; + } + + pd->default_table =3D default_table; + ret =3D em_create_perf_table(dev, pd, nr_states, cb, flags); if (ret) { + kfree(default_table); kfree(pd); return ret; } @@ -358,6 +369,7 @@ int em_dev_register_perf_domain(struct device *dev, uns= igned int nr_states, bool microwatts) { unsigned long cap, prev_cap =3D 0; + struct em_perf_state *table; unsigned long flags =3D 0; int cpu, ret; =20 @@ -416,7 +428,8 @@ int em_dev_register_perf_domain(struct device *dev, uns= igned int nr_states, =20 dev->em_pd->flags |=3D flags; =20 - em_cpufreq_update_efficiencies(dev, dev->em_pd->table); + table =3D dev->em_pd->default_table->state; + em_cpufreq_update_efficiencies(dev, table); =20 em_debug_create_pd(dev); dev_info(dev, "EM: created perf domain\n"); @@ -449,7 +462,8 @@ void em_dev_unregister_perf_domain(struct device *dev) mutex_lock(&em_pd_mutex); em_debug_remove_pd(dev); =20 - kfree(dev->em_pd->table); + kfree(pd->default_table->state); + kfree(pd->default_table); kfree(dev->em_pd); dev->em_pd =3D NULL; mutex_unlock(&em_pd_mutex); --=20 2.25.1