In mtk_cpufreq_set_target() is re-enter function but the mutex lock
decalre in mtk_cpu_dvfs_info structure for each policy. It should
change to global variable for critical session avoid race condition
with 2 or more policy.
add mtk_cpufreq_get() replace cpufreq_generic_get() and use global lock
avoid return wrong clock.
Signed-off-by: Mark Tseng <chun-jen.tseng@mediatek.com>
---
drivers/cpufreq/mediatek-cpufreq.c | 39 ++++++++++++++++++++++--------
1 file changed, 29 insertions(+), 10 deletions(-)
diff --git a/drivers/cpufreq/mediatek-cpufreq.c b/drivers/cpufreq/mediatek-cpufreq.c
index 2656b88db378..07d5958e106a 100644
--- a/drivers/cpufreq/mediatek-cpufreq.c
+++ b/drivers/cpufreq/mediatek-cpufreq.c
@@ -49,8 +49,6 @@ struct mtk_cpu_dvfs_info {
bool need_voltage_tracking;
int vproc_on_boot;
int pre_vproc;
- /* Avoid race condition for regulators between notify and policy */
- struct mutex reg_lock;
struct notifier_block opp_nb;
unsigned int opp_cpu;
unsigned long current_freq;
@@ -59,6 +57,9 @@ struct mtk_cpu_dvfs_info {
bool ccifreq_bound;
};
+/* Avoid race condition for regulators between notify and policy */
+static DEFINE_MUTEX(mtk_policy_lock);
+
static struct platform_device *cpufreq_pdev;
static LIST_HEAD(dvfs_info_list);
@@ -209,12 +210,12 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
long freq_hz, pre_freq_hz;
int vproc, pre_vproc, inter_vproc, target_vproc, ret;
+ mutex_lock(&mtk_policy_lock);
+
inter_vproc = info->intermediate_voltage;
pre_freq_hz = clk_get_rate(cpu_clk);
- mutex_lock(&info->reg_lock);
-
if (unlikely(info->pre_vproc <= 0))
pre_vproc = regulator_get_voltage(info->proc_reg);
else
@@ -308,7 +309,7 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
info->current_freq = freq_hz;
out:
- mutex_unlock(&info->reg_lock);
+ mutex_unlock(&mtk_policy_lock);
return ret;
}
@@ -316,19 +317,20 @@ static int mtk_cpufreq_set_target(struct cpufreq_policy *policy,
static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
unsigned long event, void *data)
{
- struct dev_pm_opp *opp = data;
+ struct dev_pm_opp *opp;
struct dev_pm_opp *new_opp;
struct mtk_cpu_dvfs_info *info;
unsigned long freq, volt;
struct cpufreq_policy *policy;
int ret = 0;
+ mutex_lock(&mtk_policy_lock);
+ opp = data;
info = container_of(nb, struct mtk_cpu_dvfs_info, opp_nb);
if (event == OPP_EVENT_ADJUST_VOLTAGE) {
freq = dev_pm_opp_get_freq(opp);
- mutex_lock(&info->reg_lock);
if (info->current_freq == freq) {
volt = dev_pm_opp_get_voltage(opp);
ret = mtk_cpufreq_set_voltage(info, volt);
@@ -336,7 +338,6 @@ static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
dev_err(info->cpu_dev,
"failed to scale voltage: %d\n", ret);
}
- mutex_unlock(&info->reg_lock);
} else if (event == OPP_EVENT_DISABLE) {
freq = dev_pm_opp_get_freq(opp);
@@ -361,6 +362,7 @@ static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
}
}
}
+ mutex_unlock(&mtk_policy_lock);
return notifier_from_errno(ret);
}
@@ -495,7 +497,6 @@ static int mtk_cpu_dvfs_info_init(struct mtk_cpu_dvfs_info *info, int cpu)
info->intermediate_voltage = dev_pm_opp_get_voltage(opp);
dev_pm_opp_put(opp);
- mutex_init(&info->reg_lock);
info->current_freq = clk_get_rate(info->cpu_clk);
info->opp_cpu = cpu;
@@ -607,13 +608,31 @@ static void mtk_cpufreq_exit(struct cpufreq_policy *policy)
dev_pm_opp_free_cpufreq_table(info->cpu_dev, &policy->freq_table);
}
+static unsigned int mtk_cpufreq_get(unsigned int cpu)
+{
+ struct mtk_cpu_dvfs_info *info;
+ unsigned long current_freq;
+
+ mutex_lock(&mtk_policy_lock);
+ info = mtk_cpu_dvfs_info_lookup(cpu);
+ if (!info) {
+ mutex_unlock(&mtk_policy_lock);
+ return 0;
+ }
+
+ current_freq = info->current_freq / 1000;
+ mutex_unlock(&mtk_policy_lock);
+
+ return current_freq;
+}
+
static struct cpufreq_driver mtk_cpufreq_driver = {
.flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK |
CPUFREQ_HAVE_GOVERNOR_PER_POLICY |
CPUFREQ_IS_COOLING_DEV,
.verify = cpufreq_generic_frequency_table_verify,
.target_index = mtk_cpufreq_set_target,
- .get = cpufreq_generic_get,
+ .get = mtk_cpufreq_get,
.init = mtk_cpufreq_init,
.exit = mtk_cpufreq_exit,
.register_em = cpufreq_register_em_with_opp,
--
2.45.2
Hi Mark,
kernel test robot noticed the following build warnings:
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Mark-Tseng/cpufreq-mediatek-using-global-lock-avoid-race-condition/20250214-154521
base: https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git linux-next
patch link: https://lore.kernel.org/r/20250214074353.1169864-2-chun-jen.tseng%40mediatek.com
patch subject: [PATCH v3 1/3] cpufreq: mediatek: using global lock avoid race condition
config: sparc-randconfig-r071-20250218 (https://download.01.org/0day-ci/archive/20250219/202502190807.fz6fs2jz-lkp@intel.com/config)
compiler: sparc64-linux-gcc (GCC) 14.2.0
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
| Closes: https://lore.kernel.org/r/202502190807.fz6fs2jz-lkp@intel.com/
smatch warnings:
drivers/cpufreq/mediatek-cpufreq.c:367 mtk_cpufreq_opp_notifier() warn: inconsistent returns 'global &mtk_policy_lock'.
vim +367 drivers/cpufreq/mediatek-cpufreq.c
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 317 static int mtk_cpufreq_opp_notifier(struct notifier_block *nb,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 318 unsigned long event, void *data)
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 319 {
5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng 2025-02-14 320 struct dev_pm_opp *opp;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 321 struct dev_pm_opp *new_opp;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 322 struct mtk_cpu_dvfs_info *info;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 323 unsigned long freq, volt;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 324 struct cpufreq_policy *policy;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 325 int ret = 0;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 326
5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng 2025-02-14 327 mutex_lock(&mtk_policy_lock);
5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng 2025-02-14 328 opp = data;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 329 info = container_of(nb, struct mtk_cpu_dvfs_info, opp_nb);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 330
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 331 if (event == OPP_EVENT_ADJUST_VOLTAGE) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 332 freq = dev_pm_opp_get_freq(opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 333
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 334 if (info->current_freq == freq) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 335 volt = dev_pm_opp_get_voltage(opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 336 ret = mtk_cpufreq_set_voltage(info, volt);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 337 if (ret)
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 338 dev_err(info->cpu_dev,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 339 "failed to scale voltage: %d\n", ret);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 340 }
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 341 } else if (event == OPP_EVENT_DISABLE) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 342 freq = dev_pm_opp_get_freq(opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 343
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 344 /* case of current opp item is disabled */
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 345 if (info->current_freq == freq) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 346 freq = 1;
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 347 new_opp = dev_pm_opp_find_freq_ceil(info->cpu_dev,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 348 &freq);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 349 if (IS_ERR(new_opp)) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 350 dev_err(info->cpu_dev,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 351 "all opp items are disabled\n");
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 352 ret = PTR_ERR(new_opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 353 return notifier_from_errno(ret);
mutex_unlock(&mtk_policy_lock) before returning.
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 354 }
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 355
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 356 dev_pm_opp_put(new_opp);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 357 policy = cpufreq_cpu_get(info->opp_cpu);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 358 if (policy) {
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 359 cpufreq_driver_target(policy, freq / 1000,
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 360 CPUFREQ_RELATION_L);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 361 cpufreq_cpu_put(policy);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 362 }
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 363 }
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 364 }
5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng 2025-02-14 365 mutex_unlock(&mtk_policy_lock);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 366
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 @367 return notifier_from_errno(ret);
c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-05-05 368 }
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Hi Dan,
Thanks your remind. I will be careful and fix it before submit new
patch.
BRs,
Mark Tseng
On Wed, 2025-02-19 at 10:23 +0300, Dan Carpenter wrote:
>
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
>
>
> Hi Mark,
>
> kernel test robot noticed the following build warnings:
>
> https://urldefense.com/v3/__https://git-scm.com/docs/git-format-patch*_base_tree_information__;Iw!!CTRNKA9wMg0ARbw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jViJAkm7uw$
> ]
>
> url:
> https://urldefense.com/v3/__https://github.com/intel-lab-lkp/linux/commits/Mark-Tseng/cpufreq-mediatek-using-global-lock-avoid-race-condition/20250214-154521__;!!CTRNKA9wMg0ARbw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jVj9Egobdw$
> base:
> https://urldefense.com/v3/__https://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git__;!!CTRNKA9wMg0ARbw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jVg7ujqlQw$
> linux-next
> patch link:
> https://lore.kernel.org/r/20250214074353.1169864-2-chun-jen.tseng%40mediatek.com
> patch subject: [PATCH v3 1/3] cpufreq: mediatek: using global lock
> avoid race condition
> config: sparc-randconfig-r071-20250218
> (https://urldefense.com/v3/__https://download.01.org/0day-ci/archive/
> 20250219/202502190807.fz6fs2jz-lkp@intel.com/config__;!!CTRNKA9wMg0AR
> bw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-
> 4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jVhUidk_DA$ )
> compiler: sparc64-linux-gcc (GCC) 14.2.0
>
> If you fix the issue in a separate patch/commit (i.e. not just a new
> version of
> the same patch/commit), kindly add following tags
> > Reported-by: kernel test robot <lkp@intel.com>
> > Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
> > Closes:
> > https://lore.kernel.org/r/202502190807.fz6fs2jz-lkp@intel.com/
>
> smatch warnings:
> drivers/cpufreq/mediatek-cpufreq.c:367 mtk_cpufreq_opp_notifier()
> warn: inconsistent returns 'global &mtk_policy_lock'.
>
> vim +367 drivers/cpufreq/mediatek-cpufreq.c
>
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 317 static int mtk_cpufreq_opp_notifier(struct notifier_block
> *nb,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 318 unsigned long event,
> void *data)
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 319 {
> 5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng 2025-
> 02-14 320 struct dev_pm_opp *opp;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 321 struct dev_pm_opp *new_opp;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 322 struct mtk_cpu_dvfs_info *info;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 323 unsigned long freq, volt;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 324 struct cpufreq_policy *policy;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 325 int ret = 0;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 326
> 5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng 2025-
> 02-14 327 mutex_lock(&mtk_policy_lock);
> 5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng 2025-
> 02-14 328 opp = data;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 329 info = container_of(nb, struct mtk_cpu_dvfs_info,
> opp_nb);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 330
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 331 if (event == OPP_EVENT_ADJUST_VOLTAGE) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 332 freq = dev_pm_opp_get_freq(opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 333
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 334 if (info->current_freq == freq) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 335 volt =
> dev_pm_opp_get_voltage(opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 336 ret =
> mtk_cpufreq_set_voltage(info, volt);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 337 if (ret)
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 338 dev_err(info->cpu_dev,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 339 "failed to scale
> voltage: %d\n", ret);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 340 }
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 341 } else if (event == OPP_EVENT_DISABLE) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 342 freq = dev_pm_opp_get_freq(opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 343
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 344 /* case of current opp item is disabled */
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 345 if (info->current_freq == freq) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 346 freq = 1;
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 347 new_opp =
> dev_pm_opp_find_freq_ceil(info->cpu_dev,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05
> 348
> &freq);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 349 if (IS_ERR(new_opp)) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 350 dev_err(info->cpu_dev,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 351 "all opp items are
> disabled\n");
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 352 ret = PTR_ERR(new_opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 353 return
> notifier_from_errno(ret);
>
> mutex_unlock(&mtk_policy_lock) before returning.
>
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 354 }
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 355
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 356 dev_pm_opp_put(new_opp);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 357 policy = cpufreq_cpu_get(info-
> >opp_cpu);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 358 if (policy) {
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 359
> cpufreq_driver_target(policy, freq / 1000,
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 360
> CPUFREQ_RELATION_L);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 361 cpufreq_cpu_put(policy);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 362 }
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 363 }
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 364 }
> 5f81d7eeae239d drivers/cpufreq/mediatek-cpufreq.c Mark Tseng 2025-
> 02-14 365 mutex_unlock(&mtk_policy_lock);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 366
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 @367 return notifier_from_errno(ret);
> c210063b40acab drivers/cpufreq/mediatek-cpufreq.c Rex-BC Chen 2022-
> 05-05 368 }
>
> --
> 0-DAY CI Kernel Test Service
> https://urldefense.com/v3/__https://github.com/intel/lkp-tests/wiki__;!!CTRNKA9wMg0ARbw!ixPI3aPvvJSSIiRSm4TttWZyeSNnZsdzX-4zYkY7Ax1faRLZBJ7JW3Fmf7O8BYXiyHlxDC6aauOslVt8KEW_jVii1cx6tQ$
>
On 14-02-25, 15:43, Mark Tseng wrote:
> In mtk_cpufreq_set_target() is re-enter function but the mutex lock
> decalre in mtk_cpu_dvfs_info structure for each policy. It should
> change to global variable for critical session avoid race condition
> with 2 or more policy.
And what exactly is the race condition here ? Can you please explain that ?
Since the struct mtk_cpu_dvfs_info instance is per-policy, I don't think there
is any race here.
The lock was introduced earlier to avoid a potential race with notifiers, but it
has nothing to do with calling target simultaneously.
commit c210063b40ac ("cpufreq: mediatek: Add opp notification support")
--
viresh
Hi viresh,
Thanks your review and reply.
The struct mtk_cpu_dvfs_info instance is per-policy and the reg_lock is
also in this structure. when I have two "policy-0" and "policy-6" use
the same mtk_cpufreq_set_target() function but the info->reg_lock
is from 2 instance(policy-0 and policy-6). when the policy-0 and
policy-6 call set_target target, the mutex_lock is locked by per-
policy. So, I change to global lock avoid per-policy lock.
BRs,
Mark Tseng
On Wed, 2025-02-19 at 11:12 +0530, Viresh Kumar wrote:
>
> External email : Please do not click links or open attachments until
> you have verified the sender or the content.
>
>
> On 14-02-25, 15:43, Mark Tseng wrote:
> > In mtk_cpufreq_set_target() is re-enter function but the mutex lock
> > decalre in mtk_cpu_dvfs_info structure for each policy. It should
> > change to global variable for critical session avoid race condition
> > with 2 or more policy.
>
> And what exactly is the race condition here ? Can you please explain
> that ?
> Since the struct mtk_cpu_dvfs_info instance is per-policy, I don't
> think there
> is any race here.
>
> The lock was introduced earlier to avoid a potential race with
> notifiers, but it
> has nothing to do with calling target simultaneously.
>
> commit c210063b40ac ("cpufreq: mediatek: Add opp notification
> support")
>
> --
> viresh
On 20-03-25, 08:22, Chun-Jen Tseng (曾俊仁) wrote: > The struct mtk_cpu_dvfs_info instance is per-policy and the reg_lock is > also in this structure. when I have two "policy-0" and "policy-6" use > the same mtk_cpufreq_set_target() function but the info->reg_lock > is from 2 instance(policy-0 and policy-6). when the policy-0 and > policy-6 call set_target target, the mutex_lock is locked by per- > policy. So, I change to global lock avoid per-policy lock. Yes, that's what you are doing. I am asking why a global lock is required here ? I think the per-policy lock is all you need. -- viresh
Hi viresh, I add a global lock related to the CCI driver. This is because the CCI needs to obtain the frequencies of policy-0 and policy-6 to determine its own frequency. If policy-0 and policy-6 are set simultaneously, it may cause the CCI to select the wrong frequency. Therefore, I hope to change the setting flow to the following: policy-0 or policy-6 -> set frequency -> CCI receives notification - > set CCI frequency BRs, Mark Tseng On Fri, 2025-03-21 at 10:26 +0530, Viresh Kumar wrote: > > External email : Please do not click links or open attachments until > you have verified the sender or the content. > > > On 20-03-25, 08:22, Chun-Jen Tseng (曾俊仁) wrote: > > The struct mtk_cpu_dvfs_info instance is per-policy and the > > reg_lock is > > also in this structure. when I have two "policy-0" and "policy-6" > > use > > the same mtk_cpufreq_set_target() function but the info->reg_lock > > is from 2 instance(policy-0 and policy-6). when the policy-0 and > > policy-6 call set_target target, the mutex_lock is locked by per- > > policy. So, I change to global lock avoid per-policy lock. > > Yes, that's what you are doing. I am asking why a global lock is > required here ? > I think the per-policy lock is all you need. > > -- > viresh
On 21-03-25, 05:32, Chun-Jen Tseng (曾俊仁) wrote: > I add a global lock related to the CCI driver. > > This is because the CCI needs to obtain the frequencies of policy-0 and > policy-6 to determine its own frequency. > > If policy-0 and policy-6 are set simultaneously, it may cause the CCI > to select the wrong frequency. > > Therefore, I hope to change the setting flow to the following: > policy-0 or policy-6 -> set frequency -> CCI receives notification - > set CCI frequency Can you please point to the code where this race exists ? I am not sure I fully understand it as of now. If the race is present in another driver, CCI ?, then shouldn't it be fixed there instead ? -- viresh
Hi viresh, I think the best configuration sequence is as follows: cpufreq policy -> set frequency -> CCI governor get CPUFREQ_POSTCHANGE NB -> choose CCI frequency -> set CCI frequency However, in drivers/devfreq/governor_passive.c#L77, get_target_freq_with_cpufreq() retrieves the current frequency of each policy, and it determines the CCI frequency based on the frequency of each policy. But if policy-0 and policy-6 enter simultaneously, the CCI governor might get an incorrect frequency. cpufreq policy-0 -> set frequency -> CCI governor get CPUFREQ_POSTCHANGE NB -> choose CCI frequency -> set CCI frequency => during this time, the CCI governor gets policy-0 and policy-6, BUT policy-6 may change frequency by cpufreq driver at the same time. Therefore, I want to change it so that only one policy can perform the set frequency action at a time to avoid CCI running at the wrong frequency. BRs, Mark Tseng On Fri, 2025-03-21 at 11:31 +0530, Viresh Kumar wrote: > > External email : Please do not click links or open attachments until > you have verified the sender or the content. > > > On 21-03-25, 05:32, Chun-Jen Tseng (曾俊仁) wrote: > > I add a global lock related to the CCI driver. > > > > This is because the CCI needs to obtain the frequencies of policy-0 > > and > > policy-6 to determine its own frequency. > > > > If policy-0 and policy-6 are set simultaneously, it may cause the > > CCI > > to select the wrong frequency. > > > > Therefore, I hope to change the setting flow to the following: > > policy-0 or policy-6 -> set frequency -> CCI receives > > notification - > > set CCI frequency > > Can you please point to the code where this race exists ? I am not > sure I fully understand it as of now. If the race is present in > another driver, CCI ?, then shouldn't it be fixed there instead ? > > -- > viresh
Hi, Thanks for sharing the details this time, it makes it much clearer now. On 24-03-25, 03:21, Chun-Jen Tseng (曾俊仁) wrote: > I think the best configuration sequence is as follows: > cpufreq policy -> set frequency -> CCI governor get > CPUFREQ_POSTCHANGE NB -> choose CCI frequency -> set CCI frequency > > However, in drivers/devfreq/governor_passive.c#L77, > get_target_freq_with_cpufreq() retrieves the current frequency of each > policy, > and it determines the CCI frequency based on the frequency of each > policy. > > But if policy-0 and policy-6 enter simultaneously, the CCI governor > might get an incorrect frequency. Yes it may fetch the current frequency (or last known one), but that shouldn't be a problem as the postchange notification for policy-6 should get called right after and should fix the issue. Right ? I don't think this is a race and if this requires fixing. clk_get() for any device, will always return the last configured value, while the clock might be changing at the same time. What's important is that you don't get an incorrect frequency (as in based on intermediate values of registers, etc). Note that the last configured frequency isn't an incorrect frequency. > cpufreq policy-0 -> set frequency -> CCI governor get > CPUFREQ_POSTCHANGE NB -> choose CCI frequency -> set CCI frequency > => during this time, the CCI governor gets policy-0 and policy-6, BUT > policy-6 may change frequency by cpufreq driver at the same time. Sure, and I don't see a problem with that. The issue is there only if we can reach a state where CCI is left configured in the wrong state. Which I don't think would happen here as the postchange notifier will get called again, forcing a switch of frequency again. -- viresh
Hi Viresh, The CCI level choose by Max_Level(LCPU & BCPU frequency) in devfreq driver. without global lock, It may choose wrong CCI level and cause system stall. I hope this flow is serial setting like, BCPU / LCPU set frequency -> set CCI level -> BCPU / LCPU set frequency -> set CCI level -> ...... without global lock, it could be LCPU / BCPU set frequency -> set CCI level(during this time, it may change BCPU / LCPU frequency and cause system stall. I also can only do global lock on ccifreq_support SoC. BRs, Mark Tseng On Mon, 2025-03-24 at 11:13 +0530, Viresh Kumar wrote: > > External email : Please do not click links or open attachments until > you have verified the sender or the content. > > > Hi, > > Thanks for sharing the details this time, it makes it much clearer > now. > > On 24-03-25, 03:21, Chun-Jen Tseng (曾俊仁) wrote: > > I think the best configuration sequence is as follows: > > cpufreq policy -> set frequency -> CCI governor get > > CPUFREQ_POSTCHANGE NB -> choose CCI frequency -> set CCI frequency > > > > However, in drivers/devfreq/governor_passive.c#L77, > > get_target_freq_with_cpufreq() retrieves the current frequency of > > each > > policy, > > and it determines the CCI frequency based on the frequency of each > > policy. > > > > But if policy-0 and policy-6 enter simultaneously, the CCI governor > > might get an incorrect frequency. > > Yes it may fetch the current frequency (or last known one), but that > shouldn't be a problem as the postchange notification for policy-6 > should get called right after and should fix the issue. Right ? > > I don't think this is a race and if this requires fixing. clk_get() > for any device, will always return the last configured value, while > the clock might be changing at the same time. > > What's important is that you don't get an incorrect frequency (as in > based on intermediate values of registers, etc). Note that the last > configured frequency isn't an incorrect frequency. > > > cpufreq policy-0 -> set frequency -> CCI governor get > > CPUFREQ_POSTCHANGE NB -> choose CCI frequency -> set CCI frequency > > => during this time, the CCI governor gets policy-0 and policy-6, > > BUT > > policy-6 may change frequency by cpufreq driver at the same time. > > Sure, and I don't see a problem with that. The issue is there only if > we can reach a state where CCI is left configured in the wrong state. > Which I don't think would happen here as the postchange notifier will > get called again, forcing a switch of frequency again. > > -- > viresh
On 14-04-25, 08:42, Chun-Jen Tseng (曾俊仁) wrote: > Hi Viresh, > > The CCI level choose by Max_Level(LCPU & BCPU frequency) in devfreq > driver. > without global lock, It may choose wrong CCI level and cause system > stall. > > I hope this flow is serial setting like, BCPU / LCPU set frequency -> > set CCI level -> BCPU / LCPU set frequency -> set CCI level -> ...... > > without global lock, it could be LCPU / BCPU set frequency -> set CCI > level(during this time, it may change BCPU / LCPU frequency and cause > system stall. > > I also can only do global lock on ccifreq_support SoC. As explained earlier, I don't think there is a race here. May be I am wrong. And so I need a clear code path example from you, which proves that there is a race here. -- viresh
On Wed, Apr 16, 2025 at 4:07 PM Viresh Kumar <viresh.kumar@linaro.org> wrote: > > On 14-04-25, 08:42, Chun-Jen Tseng (曾俊仁) wrote: > > Hi Viresh, > > > > The CCI level choose by Max_Level(LCPU & BCPU frequency) in devfreq > > driver. > > without global lock, It may choose wrong CCI level and cause system > > stall. > > > > I hope this flow is serial setting like, BCPU / LCPU set frequency -> > > set CCI level -> BCPU / LCPU set frequency -> set CCI level -> ...... > > > > without global lock, it could be LCPU / BCPU set frequency -> set CCI > > level(during this time, it may change BCPU / LCPU frequency and cause > > system stall. > > > > I also can only do global lock on ccifreq_support SoC. > > As explained earlier, I don't think there is a race here. May be I am > wrong. And so I need a clear code path example from you, which proves > that there is a race here. Maybe a different set of eyes will help. I talked to Chun-Jen offline, and I'll try to explain what I understand. First of all, the issue lies not in cpufreq, but in the CCI devfreq, and how the passive devfreq governor is linked to cpufreq. The CCI hardware unit on the MT8186 is sensitive to frequency changes. If the performance level of the CCI unit is much lower than either of the CPU clusters, it will hard hang the whole system. So the CCI devfreq must always take into account the performance level of both clusters, or in other words the settings of both cpufreq policies. Since the cpufreq policies only serialize with themselves, it is possible for one policy to change and trigger a devfreq update, and when the CCI devfreq driver is doing its calculations, the other policy changes and causes a big deviation from the assumed performance levels, leaving the CCI into a non-matching performance level and causing a system hang. So I think we need to handle CPUFREQ_PRECHANGE events for the frequency increase direction, as well as enlarging the devfreq mutex to cover the CPU frequency tracking bits in the passive governor. I hope that makes sense. Thanks ChenYu
On 28-08-25, 15:26, Chen-Yu Tsai wrote: > Maybe a different set of eyes will help. I talked to Chun-Jen offline, > and I'll try to explain what I understand. > > First of all, the issue lies not in cpufreq, but in the CCI devfreq, > and how the passive devfreq governor is linked to cpufreq. > > The CCI hardware unit on the MT8186 is sensitive to frequency changes. > If the performance level of the CCI unit is much lower than either > of the CPU clusters, it will hard hang the whole system. So the CCI > devfreq must always take into account the performance level of both > clusters, or in other words the settings of both cpufreq policies. > > Since the cpufreq policies only serialize with themselves, it is possible > for one policy to change and trigger a devfreq update, and when the > CCI devfreq driver is doing its calculations, the other policy changes > and causes a big deviation from the assumed performance levels, leaving the > CCI into a non-matching performance level and causing a system hang. > > So I think we need to handle CPUFREQ_PRECHANGE events for the frequency > increase direction, as well as enlarging the devfreq mutex to cover > the CPU frequency tracking bits in the passive governor. > > I hope that makes sense. If some sort of serialization is required in the CCI driver, then a lock must be present there to prevent the issues. -- viresh
© 2016 - 2026 Red Hat, Inc.