[PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition

Mark Tseng posted 3 patches 12 months ago
[PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
Posted by Mark Tseng 12 months ago
In mtk_cpufreq_set_target() is re-enter function but the mutex lock
decalre in mtk_cpu_dvfs_info structure for each policy. It should
change to global variable for critical session avoid race condition
with 2 or more policy.

add mtk_cpufreq_get() replace cpufreq_generic_get() and use global lock
avoid return wrong clock.

Signed-off-by: Mark Tseng <chun-jen.tseng@mediatek.com>
---
 drivers/cpufreq/mediatek-cpufreq.c | 39 ++++++++++++++++++++++--------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c
index 2656b88db378..07d5958e106a 100644
--- a/drivers/cpufreq/mediatek-cpufreq.c
+++ b/drivers/cpufreq/mediatek-cpufreq.c
@@ -49,8 +49,6 @@ struct mtk_cpu_dvfs_info {
 	bool need_voltage_tracking;
 	int vproc_on_boot;
 	int pre_vproc;
-	/* Avoid race condition for regulators between notify and policy */
-	struct mutex reg_lock;
 	struct notifier_block opp_nb;
 	unsigned int opp_cpu;
 	unsigned long current_freq;
@@ -59,6 +57,9 @@ struct mtk_cpu_dvfs_info {
 	bool ccifreq_bound;
 };
 
+/* Avoid race condition for regulators between notify and policy */
+static DEFINE_MUTEX(mtk_policy_lock);
+
 static struct platform_device *cpufreq_pdev;
 
 static LIST_HEAD(dvfs_info_list);
@@ -209,12 +210,12 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
 	long freq_hz, pre_freq_hz;
 	int vproc, pre_vproc, inter_vproc, target_vproc, ret;
 
+	mutex_lock(&mtk_policy_lock);
+
 	inter_vproc = info->intermediate_voltage;
 
 	pre_freq_hz = clk_get_rate(cpu_clk);
 
-	mutex_lock(&info->reg_lock);
-
 	if (unlikely(info->pre_vproc <= 0))
 		pre_vproc = regulator_get_voltage(info->proc_reg);
 	else
@@ -308,7 +309,7 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
 	info->current_freq = freq_hz;
 
 out:
-	mutex_unlock(&info->reg_lock);
+	mutex_unlock(&mtk_policy_lock);
 
 	return ret;
 }
@@ -316,19 +317,20 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
 static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
 				    unsigned long event, void *data)
 {
-	struct dev_pm_opp *opp = data;
+	struct dev_pm_opp *opp;
 	struct dev_pm_opp *new_opp;
 	struct mtk_cpu_dvfs_info *info;
 	unsigned long freq, volt;
 	struct cpufreq_policy *policy;
 	int ret = 0;
 
+	mutex_lock(&mtk_policy_lock);
+	opp = data;
 	info = container_of(nb, struct mtk_cpu_dvfs_info, opp_nb);
 
 	if (event == OPP_EVENT_ADJUST_VOLTAGE) {
 		freq = dev_pm_opp_get_freq(opp);
 
-		mutex_lock(&info->reg_lock);
 		if (info->current_freq == freq) {
 			volt = dev_pm_opp_get_voltage(opp);
 			ret = mtk_cpufreq_set_voltage(info, volt);
@@ -336,7 +338,6 @@ static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
 				dev_err(info->cpu_dev,
 					"failed to scale voltage: %d\n", ret);
 		}
-		mutex_unlock(&info->reg_lock);
 	} else if (event == OPP_EVENT_DISABLE) {
 		freq = dev_pm_opp_get_freq(opp);
 
@@ -361,6 +362,7 @@ static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
 			}
 		}
 	}
+	mutex_unlock(&mtk_policy_lock);
 
 	return notifier_from_errno(ret);
 }
@@ -495,7 +497,6 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
 	info->intermediate_voltage = dev_pm_opp_get_voltage(opp);
 	dev_pm_opp_put(opp);
 
-	mutex_init(&info->reg_lock);
 	info->current_freq = clk_get_rate(info->cpu_clk);
 
 	info->opp_cpu = cpu;
@@ -607,13 +608,31 @@ static void mtk_cpufreq_exit(struct cpufreq_policy *policy)
 	dev_pm_opp_free_cpufreq_table(info->cpu_dev, &policy->freq_table);
 }
 
+static unsigned int mtk_cpufreq_get(unsigned int cpu)
+{
+	struct mtk_cpu_dvfs_info *info;
+	unsigned long current_freq;
+
+	mutex_lock(&mtk_policy_lock);
+	info = mtk_cpu_dvfs_info_lookup(cpu);
+	if (!info) {
+		mutex_unlock(&mtk_policy_lock);
+		return 0;
+	}
+
+	current_freq = info->current_freq / 1000;
+	mutex_unlock(&mtk_policy_lock);
+
+	return current_freq;
+}
+
 static struct cpufreq_driver mtk_cpufreq_driver = {
 	.flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK |
 		 CPUFREQ_HAVE_GOVERNOR_PER_POLICY |
 		 CPUFREQ_IS_COOLING_DEV,
 	.verify = cpufreq_generic_frequency_table_verify,
 	.target_index = mtk_cpufreq_set_target,
-	.get = cpufreq_generic_get,
+	.get = mtk_cpufreq_get,
 	.init = mtk_cpufreq_init,
 	.exit = mtk_cpufreq_exit,
 	.register_em = cpufreq_register_em_with_opp,
-- 
2.45.2
Re: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
Posted by Dan Carpenter 11 months, 3 weeks ago
Hi Mark,

kernel test robot noticed the following build warnings:

https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/Mark-Tseng/cpufreq-mediatek-using-global-lock-avoid-race-condition/20250214-154521
base:   https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link:    https://lore.kernel.org/r/20250214074353.1169864-2-chun-jen.tseng%40mediatek.com
patch subject: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
config: sparc-randconfig-r071-20250218 (https://download.01.org/0day-ci/archive/20250219/202502190807.fz6fs2jz-lkp@intel.com/config)
compiler: sparc64-linux-gcc (GCC) 14.2.0

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
| Closes: https://lore.kernel.org/r/202502190807.fz6fs2jz-lkp@intel.com/

smatch warnings:
drivers/cpufreq/mediatek-cpufreq.c:367 mtk_cpufreq_opp_notifier() warn: inconsistent returns 'global &mtk_policy_lock'.

vim +367 drivers/cpufreq/mediatek-cpufreq.c

c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  317  static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  318  				    unsigned long event, void *data)
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  319  {
5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-02-14  320  	struct dev_pm_opp *opp;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  321  	struct dev_pm_opp *new_opp;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  322  	struct mtk_cpu_dvfs_info *info;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  323  	unsigned long freq, volt;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  324  	struct cpufreq_policy *policy;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  325  	int ret = 0;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  326  
5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-02-14  327  	mutex_lock(&mtk_policy_lock);
5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-02-14  328  	opp = data;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  329  	info = container_of(nb, struct mtk_cpu_dvfs_info, opp_nb);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  330  
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  331  	if (event == OPP_EVENT_ADJUST_VOLTAGE) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  332  		freq = dev_pm_opp_get_freq(opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  333  
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  334  		if (info->current_freq == freq) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  335  			volt = dev_pm_opp_get_voltage(opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  336  			ret = mtk_cpufreq_set_voltage(info, volt);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  337  			if (ret)
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  338  				dev_err(info->cpu_dev,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  339  					"failed to scale voltage: %d\n", ret);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  340  		}
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  341  	} else if (event == OPP_EVENT_DISABLE) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  342  		freq = dev_pm_opp_get_freq(opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  343  
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  344  		/* case of current opp item is disabled */
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  345  		if (info->current_freq == freq) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  346  			freq = 1;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  347  			new_opp = dev_pm_opp_find_freq_ceil(info->cpu_dev,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  348  							    &freq);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  349  			if (IS_ERR(new_opp)) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  350  				dev_err(info->cpu_dev,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  351  					"all opp items are disabled\n");
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  352  				ret = PTR_ERR(new_opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  353  				return notifier_from_errno(ret);

mutex_unlock(&mtk_policy_lock) before returning.

c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  354  			}
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  355  
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  356  			dev_pm_opp_put(new_opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  357  			policy = cpufreq_cpu_get(info->opp_cpu);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  358  			if (policy) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  359  				cpufreq_driver_target(policy, freq / 1000,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  360  						      CPUFREQ_RELATION_L);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  361  				cpufreq_cpu_put(policy);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  362  			}
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  363  		}
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  364  	}
5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-02-14  365  	mutex_unlock(&mtk_policy_lock);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  366  
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05 @367  	return notifier_from_errno(ret);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-05-05  368  }

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
Posted by Chun-Jen Tseng (曾俊仁) 10 months, 3 weeks ago
Hi Dan,

Thanks your remind. I will be careful and fix it before submit new
patch.

BRs,

Mark Tseng


On Wed, 2025-02-19 at 10:23 +0300, Dan Carpenter wrote:
> 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
> 
> 
> Hi Mark,
> 
> kernel test robot noticed the following build warnings:
> 
> https://urldefense.com/v3/__https://git-scm.com/docs/git-format-patch*_base_tree_information__;Iw!!CTRNKA9wMg0ARbw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jViJAkm7uw$
>  ]
> 
> url:   
> https://urldefense.com/v3/__https://github.com/intel-lab-lkp/linux/commits/Mark-Tseng/cpufreq-mediatek-using-global-lock-avoid-race-condition/20250214-154521__;!!CTRNKA9wMg0ARbw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jVj9Egobdw$
> base:  
> https://urldefense.com/v3/__https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git__;!!CTRNKA9wMg0ARbw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jVg7ujqlQw$
>   linux-next
> patch link:   
> https://lore.kernel.org/r/20250214074353.1169864-2-chun-jen.tseng%40mediatek.com
> patch subject: [PATCH v3 1/3] cpufreq: mediatek: using global lock
> avoid race condition
> config: sparc-randconfig-r071-20250218
> (https://urldefense.com/v3/__https://download.01.org/0day-ci/archive/
> 20250219/202502190807.fz6fs2jz-lkp@intel.com/config__;!!CTRNKA9wMg0AR
> bw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-
> 4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jVhUidk_DA$ )
> compiler: sparc64-linux-gcc (GCC) 14.2.0
> 
> If you fix the issue in a separate patch/commit (i.e. not just a new
> version of
> the same patch/commit), kindly add following tags
> > Reported-by: kernel test robot <lkp@intel.com>
> > Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
> > Closes:
> > https://lore.kernel.org/r/202502190807.fz6fs2jz-lkp@intel.com/
> 
> smatch warnings:
> drivers/cpufreq/mediatek-cpufreq.c:367 mtk_cpufreq_opp_notifier()
> warn: inconsistent returns 'global &mtk_policy_lock'.
> 
> vim +367 drivers/cpufreq/mediatek-cpufreq.c
> 
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  317  static int mtk_cpufreq_opp_notifier(struct notifier_block
> *nb,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  318                                     unsigned long event,
> void *data)
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  319  {
> 5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-
> 02-14  320         struct dev_pm_opp *opp;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  321         struct dev_pm_opp *new_opp;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  322         struct mtk_cpu_dvfs_info *info;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  323         unsigned long freq, volt;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  324         struct cpufreq_policy *policy;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  325         int ret = 0;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  326
> 5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-
> 02-14  327         mutex_lock(&mtk_policy_lock);
> 5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-
> 02-14  328         opp = data;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  329         info = container_of(nb, struct mtk_cpu_dvfs_info,
> opp_nb);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  330
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  331         if (event == OPP_EVENT_ADJUST_VOLTAGE) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  332                 freq = dev_pm_opp_get_freq(opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  333
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  334                 if (info->current_freq == freq) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  335                         volt =
> dev_pm_opp_get_voltage(opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  336                         ret =
> mtk_cpufreq_set_voltage(info, volt);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  337                         if (ret)
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  338                                 dev_err(info->cpu_dev,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  339                                         "failed to scale
> voltage: %d\n", ret);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  340                 }
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  341         } else if (event == OPP_EVENT_DISABLE) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  342                 freq = dev_pm_opp_get_freq(opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  343
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  344                 /* case of current opp item is disabled */
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  345                 if (info->current_freq == freq) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  346                         freq = 1;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  347                         new_opp =
> dev_pm_opp_find_freq_ceil(info->cpu_dev,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05 
> 348                                                            
> &freq);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  349                         if (IS_ERR(new_opp)) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  350                                 dev_err(info->cpu_dev,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  351                                         "all opp items are
> disabled\n");
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  352                                 ret = PTR_ERR(new_opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  353                                 return
> notifier_from_errno(ret);
> 
> mutex_unlock(&mtk_policy_lock) before returning.
> 
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  354                         }
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  355
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  356                         dev_pm_opp_put(new_opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  357                         policy = cpufreq_cpu_get(info-
> >opp_cpu);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  358                         if (policy) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  359                                
> cpufreq_driver_target(policy, freq / 1000,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  360                                                      
> CPUFREQ_RELATION_L);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  361                                 cpufreq_cpu_put(policy);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  362                         }
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  363                 }
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  364         }
> 5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng    2025-
> 02-14  365         mutex_unlock(&mtk_policy_lock);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  366
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05 @367         return notifier_from_errno(ret);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen   2022-
> 05-05  368  }
> 
> --
> 0-DAY CI Kernel Test Service
> https://urldefense.com/v3/__https://github.com/intel/lkp-tests/wiki__;!!CTRNKA9wMg0ARbw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jVii1cx6tQ$
> 

Re: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
Posted by Viresh Kumar 11 months, 3 weeks ago
On 14-02-25, 15:43, Mark Tseng wrote:
> In mtk_cpufreq_set_target() is re-enter function but the mutex lock
> decalre in mtk_cpu_dvfs_info structure for each policy. It should
> change to global variable for critical session avoid race condition
> with 2 or more policy.

And what exactly is the race condition here ? Can you please explain that ?
Since the struct mtk_cpu_dvfs_info instance is per-policy, I don't think there
is any race here.

The lock was introduced earlier to avoid a potential race with notifiers, but it
has nothing to do with calling target simultaneously.

commit c210063b40ac ("cpufreq: mediatek: Add opp notification support")

-- 
viresh
Re: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
Posted by Chun-Jen Tseng (曾俊仁) 10 months, 3 weeks ago
Hi viresh,

Thanks your review and reply.

The struct mtk_cpu_dvfs_info instance is per-policy and the reg_lock is
also in this structure. when I have two "policy-0" and "policy-6" use
the same mtk_cpufreq_set_target() function but the info->reg_lock 
is from 2 instance(policy-0 and policy-6). when the policy-0 and
policy-6 call set_target target, the mutex_lock is locked by per-
policy. So, I change to global lock avoid per-policy lock.

BRs,

Mark Tseng

On Wed, 2025-02-19 at 11:12 +0530, Viresh Kumar wrote:
> 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
> 
> 
> On 14-02-25, 15:43, Mark Tseng wrote:
> > In mtk_cpufreq_set_target() is re-enter function but the mutex lock
> > decalre in mtk_cpu_dvfs_info structure for each policy. It should
> > change to global variable for critical session avoid race condition
> > with 2 or more policy.
> 
> And what exactly is the race condition here ? Can you please explain
> that ?
> Since the struct mtk_cpu_dvfs_info instance is per-policy, I don't
> think there
> is any race here.
> 
> The lock was introduced earlier to avoid a potential race with
> notifiers, but it
> has nothing to do with calling target simultaneously.
> 
> commit c210063b40ac ("cpufreq: mediatek: Add opp notification
> support")
> 
> --
> viresh
Re: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
Posted by Viresh Kumar 10 months, 3 weeks ago
On 20-03-25, 08:22, Chun-Jen Tseng (曾俊仁) wrote:
> The struct mtk_cpu_dvfs_info instance is per-policy and the reg_lock is
> also in this structure. when I have two "policy-0" and "policy-6" use
> the same mtk_cpufreq_set_target() function but the info->reg_lock 
> is from 2 instance(policy-0 and policy-6). when the policy-0 and
> policy-6 call set_target target, the mutex_lock is locked by per-
> policy. So, I change to global lock avoid per-policy lock.

Yes, that's what you are doing. I am asking why a global lock is required here ?
I think the per-policy lock is all you need.

-- 
viresh
Re: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
Posted by Chun-Jen Tseng (曾俊仁) 10 months, 3 weeks ago
Hi viresh,

I add a global lock related to the CCI driver.

This is because the CCI needs to obtain the frequencies of policy-0 and
policy-6 to determine its own frequency.

If policy-0 and policy-6 are set simultaneously, it may cause the CCI
to select the wrong frequency.

Therefore, I hope to change the setting flow to the following:
   policy-0 or policy-6 -> set frequency -> CCI receives notification -
> set CCI frequency

BRs,

Mark Tseng

On Fri, 2025-03-21 at 10:26 +0530, Viresh Kumar wrote:
> 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
> 
> 
> On 20-03-25, 08:22, Chun-Jen Tseng (曾俊仁) wrote:
> > The struct mtk_cpu_dvfs_info instance is per-policy and the
> > reg_lock is
> > also in this structure. when I have two "policy-0" and "policy-6"
> > use
> > the same mtk_cpufreq_set_target() function but the info->reg_lock
> > is from 2 instance(policy-0 and policy-6). when the policy-0 and
> > policy-6 call set_target target, the mutex_lock is locked by per-
> > policy. So, I change to global lock avoid per-policy lock.
> 
> Yes, that's what you are doing. I am asking why a global lock is
> required here ?
> I think the per-policy lock is all you need.
> 
> --
> viresh

Re: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
Posted by Viresh Kumar 10 months, 3 weeks ago
On 21-03-25, 05:32, Chun-Jen Tseng (曾俊仁) wrote:
> I add a global lock related to the CCI driver.
> 
> This is because the CCI needs to obtain the frequencies of policy-0 and
> policy-6 to determine its own frequency.
> 
> If policy-0 and policy-6 are set simultaneously, it may cause the CCI
> to select the wrong frequency.
> 
> Therefore, I hope to change the setting flow to the following:
>    policy-0 or policy-6 -> set frequency -> CCI receives notification -
> set CCI frequency

Can you please point to the code where this race exists ? I am not
sure I fully understand it as of now. If the race is present in
another driver, CCI ?, then shouldn't it be fixed there instead ?

-- 
viresh
Re: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
Posted by Chun-Jen Tseng (曾俊仁) 10 months, 3 weeks ago
Hi viresh,

I think the best configuration sequence is as follows:
  cpufreq policy -> set frequency -> CCI governor get
CPUFREQ_POSTCHANGE NB -> choose CCI frequency -> set CCI frequency

However, in drivers/devfreq/governor_passive.c#L77,
get_target_freq_with_cpufreq() retrieves the current frequency of each
policy,
and it determines the CCI frequency based on the frequency of each
policy.

But if policy-0 and policy-6 enter simultaneously, the CCI governor
might get an incorrect frequency.

cpufreq policy-0 -> set frequency -> CCI governor get
CPUFREQ_POSTCHANGE NB -> choose CCI frequency -> set CCI frequency
=> during this time, the CCI governor gets policy-0 and policy-6, BUT
policy-6 may change frequency by cpufreq driver at the same time.

Therefore, I want to change it so that only one policy can perform the
set frequency action at a time to avoid CCI running at the wrong
frequency.

BRs,

Mark Tseng

On Fri, 2025-03-21 at 11:31 +0530, Viresh Kumar wrote:
> 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
> 
> 
> On 21-03-25, 05:32, Chun-Jen Tseng (曾俊仁) wrote:
> > I add a global lock related to the CCI driver.
> > 
> > This is because the CCI needs to obtain the frequencies of policy-0
> > and
> > policy-6 to determine its own frequency.
> > 
> > If policy-0 and policy-6 are set simultaneously, it may cause the
> > CCI
> > to select the wrong frequency.
> > 
> > Therefore, I hope to change the setting flow to the following:
> >    policy-0 or policy-6 -> set frequency -> CCI receives
> > notification -
> > set CCI frequency
> 
> Can you please point to the code where this race exists ? I am not
> sure I fully understand it as of now. If the race is present in
> another driver, CCI ?, then shouldn't it be fixed there instead ?
> 
> --
> viresh

Re: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
Posted by Viresh Kumar 10 months, 3 weeks ago
Hi,

Thanks for sharing the details this time, it makes it much clearer
now.

On 24-03-25, 03:21, Chun-Jen Tseng (曾俊仁) wrote:
> I think the best configuration sequence is as follows:
>   cpufreq policy -> set frequency -> CCI governor get
> CPUFREQ_POSTCHANGE NB -> choose CCI frequency -> set CCI frequency
> 
> However, in drivers/devfreq/governor_passive.c#L77,
> get_target_freq_with_cpufreq() retrieves the current frequency of each
> policy,
> and it determines the CCI frequency based on the frequency of each
> policy.
> 
> But if policy-0 and policy-6 enter simultaneously, the CCI governor
> might get an incorrect frequency.

Yes it may fetch the current frequency (or last known one), but that
shouldn't be a problem as the postchange notification for policy-6
should get called right after and should fix the issue. Right ?

I don't think this is a race and if this requires fixing. clk_get()
for any device, will always return the last configured value, while
the clock might be changing at the same time.

What's important is that you don't get an incorrect frequency (as in
based on intermediate values of registers, etc). Note that the last
configured frequency isn't an incorrect frequency.

> cpufreq policy-0 -> set frequency -> CCI governor get
> CPUFREQ_POSTCHANGE NB -> choose CCI frequency -> set CCI frequency
> => during this time, the CCI governor gets policy-0 and policy-6, BUT
> policy-6 may change frequency by cpufreq driver at the same time.

Sure, and I don't see a problem with that. The issue is there only if
we can reach a state where CCI is left configured in the wrong state.
Which I don't think would happen here as the postchange notifier will
get called again, forcing a switch of frequency again.

-- 
viresh
Re: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
Posted by Chun-Jen Tseng (曾俊仁) 10 months ago
Hi Viresh,

The CCI level choose by Max_Level(LCPU & BCPU frequency) in devfreq
driver.
without global lock, It may choose wrong CCI level and cause system
stall.

I hope this flow is serial setting like, BCPU / LCPU set frequency ->
set CCI level -> BCPU / LCPU set frequency -> set CCI level -> ......

without global lock, it could be LCPU / BCPU set frequency -> set CCI
level(during this time, it may change BCPU / LCPU frequency and cause
system stall.

I also can only do global lock on ccifreq_support SoC.

BRs,

Mark Tseng

On Mon, 2025-03-24 at 11:13 +0530, Viresh Kumar wrote:
> 
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
> 
> 
> Hi,
> 
> Thanks for sharing the details this time, it makes it much clearer
> now.
> 
> On 24-03-25, 03:21, Chun-Jen Tseng (曾俊仁) wrote:
> > I think the best configuration sequence is as follows:
> >   cpufreq policy -> set frequency -> CCI governor get
> > CPUFREQ_POSTCHANGE NB -> choose CCI frequency -> set CCI frequency
> > 
> > However, in drivers/devfreq/governor_passive.c#L77,
> > get_target_freq_with_cpufreq() retrieves the current frequency of
> > each
> > policy,
> > and it determines the CCI frequency based on the frequency of each
> > policy.
> > 
> > But if policy-0 and policy-6 enter simultaneously, the CCI governor
> > might get an incorrect frequency.
> 
> Yes it may fetch the current frequency (or last known one), but that
> shouldn't be a problem as the postchange notification for policy-6
> should get called right after and should fix the issue. Right ?
> 
> I don't think this is a race and if this requires fixing. clk_get()
> for any device, will always return the last configured value, while
> the clock might be changing at the same time.
> 
> What's important is that you don't get an incorrect frequency (as in
> based on intermediate values of registers, etc). Note that the last
> configured frequency isn't an incorrect frequency.
> 
> > cpufreq policy-0 -> set frequency -> CCI governor get
> > CPUFREQ_POSTCHANGE NB -> choose CCI frequency -> set CCI frequency
> > => during this time, the CCI governor gets policy-0 and policy-6,
> > BUT
> > policy-6 may change frequency by cpufreq driver at the same time.
> 
> Sure, and I don't see a problem with that. The issue is there only if
> we can reach a state where CCI is left configured in the wrong state.
> Which I don't think would happen here as the postchange notifier will
> get called again, forcing a switch of frequency again.
> 
> --
> viresh

Re: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
Posted by Viresh Kumar 9 months, 4 weeks ago
On 14-04-25, 08:42, Chun-Jen Tseng (曾俊仁) wrote:
> Hi Viresh,
> 
> The CCI level choose by Max_Level(LCPU & BCPU frequency) in devfreq
> driver.
> without global lock, It may choose wrong CCI level and cause system
> stall.
> 
> I hope this flow is serial setting like, BCPU / LCPU set frequency ->
> set CCI level -> BCPU / LCPU set frequency -> set CCI level -> ......
> 
> without global lock, it could be LCPU / BCPU set frequency -> set CCI
> level(during this time, it may change BCPU / LCPU frequency and cause
> system stall.
> 
> I also can only do global lock on ccifreq_support SoC.

As explained earlier, I don't think there is a race here. May be I am
wrong. And so I need a clear code path example from you, which proves
that there is a race here.

-- 
viresh
Re: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
Posted by Chen-Yu Tsai 5 months, 2 weeks ago
On Wed, Apr 16, 2025 at 4:07 PM Viresh Kumar <viresh.kumar@linaro.org> wrote:
>
> On 14-04-25, 08:42, Chun-Jen Tseng (曾俊仁) wrote:
> > Hi Viresh,
> >
> > The CCI level choose by Max_Level(LCPU & BCPU frequency) in devfreq
> > driver.
> > without global lock, It may choose wrong CCI level and cause system
> > stall.
> >
> > I hope this flow is serial setting like, BCPU / LCPU set frequency ->
> > set CCI level -> BCPU / LCPU set frequency -> set CCI level -> ......
> >
> > without global lock, it could be LCPU / BCPU set frequency -> set CCI
> > level(during this time, it may change BCPU / LCPU frequency and cause
> > system stall.
> >
> > I also can only do global lock on ccifreq_support SoC.
>
> As explained earlier, I don't think there is a race here. May be I am
> wrong. And so I need a clear code path example from you, which proves
> that there is a race here.

Maybe a different set of eyes will help. I talked to Chun-Jen offline,
and I'll try to explain what I understand.

First of all, the issue lies not in cpufreq, but in the CCI devfreq,
and how the passive devfreq governor is linked to cpufreq.

The CCI hardware unit on the MT8186 is sensitive to frequency changes.
If the performance level of the CCI unit is much lower than either
of the CPU clusters, it  will hard hang the whole system. So the CCI
devfreq must always take into account the performance level of both
clusters, or in other words the settings of both cpufreq policies.

Since the cpufreq policies only serialize with themselves, it is possible
for one policy to change and trigger a devfreq update, and when the
CCI devfreq driver is doing its calculations, the other policy changes
and causes a big deviation from the assumed performance levels, leaving the
CCI into a non-matching performance level and causing a system hang.

So I think we need to handle CPUFREQ_PRECHANGE events for the frequency
increase direction, as well as enlarging the devfreq mutex to cover
the CPU frequency tracking bits in the passive governor.

I hope that makes sense.


Thanks
ChenYu
Re: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
Posted by Viresh Kumar 5 months, 2 weeks ago
On 28-08-25, 15:26, Chen-Yu Tsai wrote:
> Maybe a different set of eyes will help. I talked to Chun-Jen offline,
> and I'll try to explain what I understand.
> 
> First of all, the issue lies not in cpufreq, but in the CCI devfreq,
> and how the passive devfreq governor is linked to cpufreq.
> 
> The CCI hardware unit on the MT8186 is sensitive to frequency changes.
> If the performance level of the CCI unit is much lower than either
> of the CPU clusters, it  will hard hang the whole system. So the CCI
> devfreq must always take into account the performance level of both
> clusters, or in other words the settings of both cpufreq policies.
> 
> Since the cpufreq policies only serialize with themselves, it is possible
> for one policy to change and trigger a devfreq update, and when the
> CCI devfreq driver is doing its calculations, the other policy changes
> and causes a big deviation from the assumed performance levels, leaving the
> CCI into a non-matching performance level and causing a system hang.
> 
> So I think we need to handle CPUFREQ_PRECHANGE events for the frequency
> increase direction, as well as enlarging the devfreq mutex to cover
> the CPU frequency tracking bits in the passive governor.
> 
> I hope that makes sense.

If some sort of serialization is required in the CCI driver, then a
lock must be present there to prevent the issues.

-- 
viresh