[RESEND][PATCH 3/8] PM: EM: Use the new .get_cost() callback while registering EM

Lukasz Luba posted 8 patches 2 years, 6 months ago
[RESEND][PATCH 3/8] PM: EM: Use the new .get_cost() callback while registering EM
Posted by Lukasz Luba 2 years, 6 months ago
The Energy Model (EM) allows to provide the 'cost' values when the device
driver provides the .get_cost() optional callback. This removes
restriction which is in the EM calculation function of the 'cost'
for each performance state. Now, the driver is in charge of providing
the right values which are then used by Energy Aware Scheduler.

Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
---
 kernel/power/energy_model.c | 39 ++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
index 6ecee99af842..95a3b33001f6 100644
--- a/kernel/power/energy_model.c
+++ b/kernel/power/energy_model.c
@@ -121,7 +121,8 @@ static void em_debug_remove_pd(struct device *dev) {}
 #endif
 
 static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
-				int nr_states, struct em_data_callback *cb)
+				int nr_states, struct em_data_callback *cb,
+				unsigned long flags)
 {
 	unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
 	struct em_perf_state *table;
@@ -173,10 +174,22 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
 	/* Compute the cost of each performance state. */
 	fmax = (u64) table[nr_states - 1].frequency;
 	for (i = nr_states - 1; i >= 0; i--) {
-		unsigned long power_res = em_scale_power(table[i].power);
+		unsigned long power_res, cost;
+
+		if (flags & EM_PERF_DOMAIN_ARTIFICIAL) {
+			ret = cb->get_cost(dev, table[i].frequency, &cost);
+			if (ret || !cost || cost > EM_MAX_POWER) {
+				dev_err(dev, "EM: invalid cost %lu %d\n",
+					cost, ret);
+				goto free_ps_table;
+			}
+		} else {
+			power_res = em_scale_power(table[i].power);
+			cost = div64_u64(fmax * power_res, table[i].frequency);
+		}
+
+		table[i].cost = cost;
 
-		table[i].cost = div64_u64(fmax * power_res,
-					  table[i].frequency);
 		if (table[i].cost >= prev_cost) {
 			table[i].flags = EM_PERF_STATE_INEFFICIENT;
 			dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
@@ -197,7 +210,8 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
 }
 
 static int em_create_pd(struct device *dev, int nr_states,
-			struct em_data_callback *cb, cpumask_t *cpus)
+			struct em_data_callback *cb, cpumask_t *cpus,
+			unsigned long flags)
 {
 	struct em_perf_domain *pd;
 	struct device *cpu_dev;
@@ -215,7 +229,7 @@ static int em_create_pd(struct device *dev, int nr_states,
 			return -ENOMEM;
 	}
 
-	ret = em_create_perf_table(dev, pd, nr_states, cb);
+	ret = em_create_perf_table(dev, pd, nr_states, cb, flags);
 	if (ret) {
 		kfree(pd);
 		return ret;
@@ -332,6 +346,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
 				bool milliwatts)
 {
 	unsigned long cap, prev_cap = 0;
+	unsigned long flags = 0;
 	int cpu, ret;
 
 	if (!dev || !nr_states || !cb)
@@ -378,14 +393,16 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
 		}
 	}
 
-	ret = em_create_pd(dev, nr_states, cb, cpus);
+	if (milliwatts)
+		flags |= EM_PERF_DOMAIN_MILLIWATTS;
+	else if (cb->get_cost)
+		flags |= EM_PERF_DOMAIN_ARTIFICIAL;
+
+	ret = em_create_pd(dev, nr_states, cb, cpus, flags);
 	if (ret)
 		goto unlock;
 
-	if (milliwatts)
-		dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS;
-	else if (cb->get_cost)
-		dev->em_pd->flags |= EM_PERF_DOMAIN_ARTIFICIAL;
+	dev->em_pd->flags |= flags;
 
 	em_cpufreq_update_efficiencies(dev);
 
-- 
2.17.1
Re: [RESEND][PATCH 3/8] PM: EM: Use the new .get_cost() callback while registering EM
Posted by Ionela Voinescu 2 years, 5 months ago
Hi Lukasz,

On Monday 21 Mar 2022 at 09:57:24 (+0000), Lukasz Luba wrote:
> The Energy Model (EM) allows to provide the 'cost' values when the device
> driver provides the .get_cost() optional callback. This removes
> restriction which is in the EM calculation function of the 'cost'
> for each performance state. Now, the driver is in charge of providing
> the right values which are then used by Energy Aware Scheduler.
> 
> Signed-off-by: Lukasz Luba <lukasz.luba@arm.com>
> ---
>  kernel/power/energy_model.c | 39 ++++++++++++++++++++++++++-----------
>  1 file changed, 28 insertions(+), 11 deletions(-)
> 
> diff --git a/kernel/power/energy_model.c b/kernel/power/energy_model.c
> index 6ecee99af842..95a3b33001f6 100644
> --- a/kernel/power/energy_model.c
> +++ b/kernel/power/energy_model.c
> @@ -121,7 +121,8 @@ static void em_debug_remove_pd(struct device *dev) {}
>  #endif
>  
>  static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
> -				int nr_states, struct em_data_callback *cb)
> +				int nr_states, struct em_data_callback *cb,
> +				unsigned long flags)
>  {
>  	unsigned long power, freq, prev_freq = 0, prev_cost = ULONG_MAX;
>  	struct em_perf_state *table;
> @@ -173,10 +174,22 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
>  	/* Compute the cost of each performance state. */
>  	fmax = (u64) table[nr_states - 1].frequency;
>  	for (i = nr_states - 1; i >= 0; i--) {
> -		unsigned long power_res = em_scale_power(table[i].power);
> +		unsigned long power_res, cost;
> +
> +		if (flags & EM_PERF_DOMAIN_ARTIFICIAL) {
> +			ret = cb->get_cost(dev, table[i].frequency, &cost);
> +			if (ret || !cost || cost > EM_MAX_POWER) {
> +				dev_err(dev, "EM: invalid cost %lu %d\n",
> +					cost, ret);
> +				goto free_ps_table;
> +			}
> +		} else {
> +			power_res = em_scale_power(table[i].power);
> +			cost = div64_u64(fmax * power_res, table[i].frequency);
> +		}
> +
> +		table[i].cost = cost;
>  
> -		table[i].cost = div64_u64(fmax * power_res,
> -					  table[i].frequency);
>  		if (table[i].cost >= prev_cost) {
>  			table[i].flags = EM_PERF_STATE_INEFFICIENT;
>  			dev_dbg(dev, "EM: OPP:%lu is inefficient\n",
> @@ -197,7 +210,8 @@ static int em_create_perf_table(struct device *dev, struct em_perf_domain *pd,
>  }
>  
>  static int em_create_pd(struct device *dev, int nr_states,
> -			struct em_data_callback *cb, cpumask_t *cpus)
> +			struct em_data_callback *cb, cpumask_t *cpus,
> +			unsigned long flags)
>  {
>  	struct em_perf_domain *pd;
>  	struct device *cpu_dev;
> @@ -215,7 +229,7 @@ static int em_create_pd(struct device *dev, int nr_states,
>  			return -ENOMEM;
>  	}
>  
> -	ret = em_create_perf_table(dev, pd, nr_states, cb);
> +	ret = em_create_perf_table(dev, pd, nr_states, cb, flags);
>  	if (ret) {
>  		kfree(pd);
>  		return ret;
> @@ -332,6 +346,7 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
>  				bool milliwatts)
>  {
>  	unsigned long cap, prev_cap = 0;
> +	unsigned long flags = 0;
>  	int cpu, ret;
>  
>  	if (!dev || !nr_states || !cb)
> @@ -378,14 +393,16 @@ int em_dev_register_perf_domain(struct device *dev, unsigned int nr_states,
>  		}
>  	}
>  
> -	ret = em_create_pd(dev, nr_states, cb, cpus);
> +	if (milliwatts)
> +		flags |= EM_PERF_DOMAIN_MILLIWATTS;
> +	else if (cb->get_cost)
> +		flags |= EM_PERF_DOMAIN_ARTIFICIAL;
> +
> +	ret = em_create_pd(dev, nr_states, cb, cpus, flags);
>  	if (ret)
>  		goto unlock;
>  
> -	if (milliwatts)
> -		dev->em_pd->flags |= EM_PERF_DOMAIN_MILLIWATTS;
> -	else if (cb->get_cost)
> -		dev->em_pd->flags |= EM_PERF_DOMAIN_ARTIFICIAL;
> +	dev->em_pd->flags |= flags;
>  

nit: given that there is a designated function to create the PD which
now requires information on flags to initilise the domains properly,
might be better to have dev->em_pd->flags set inside that function,
rather than here.

>  	em_cpufreq_update_efficiencies(dev);
>  

Reviewed-by: Ionela Voinescu <ionela.voinescu@arm.com>

Thanks,
Ionela.

> -- 
> 2.17.1
>