workqueue_apply_unbound_cpumask() cannot proceed with an ordered
workqueue if its dfl_pwq is still frozen. Just do a sleep wait for
it to be thawed may not work in some cases if pwq_release_workfn() is
somehow prevented from being called due to resources (e.g. wq_pool_mutex)
that are held by its caller.
To break the logjam, we have to actively check if the frozen dfl_pwq
is ready to be thawed and call thaw_pwq() directly if so.
Signed-off-by: Waiman Long <longman@redhat.com>
---
kernel/workqueue.c | 30 ++++++++++++++++++++++++++++--
1 file changed, 28 insertions(+), 2 deletions(-)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index f089e532758a..ee934c2c6ea8 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -6354,6 +6354,32 @@ void thaw_workqueues(void)
}
#endif /* CONFIG_FREEZER */
+/*
+ * Check the given ordered workqueue to see if its non-default pwq's have
+ * zero reference count and if so thaw the frozen default pwq.
+ *
+ * Return:
+ * %true if dfl_pwq has been thawed or %false otherwise.
+ */
+static bool ordered_workqueue_ref_check(struct workqueue_struct *wq)
+{
+ int refs = 0;
+ struct pool_workqueue *pwq;
+
+ if (!READ_ONCE(wq->dfl_pwq->frozen))
+ return true;
+ mutex_lock(&wq->mutex);
+ for_each_pwq(pwq, wq) {
+ if (pwq == wq->dfl_pwq)
+ continue;
+ refs += pwq->refcnt;
+ }
+ if (!refs)
+ thaw_pwq(wq->dfl_pwq);
+ mutex_unlock(&wq->mutex);
+ return !refs;
+}
+
static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
{
LIST_HEAD(ctxs);
@@ -6378,12 +6404,12 @@ static int workqueue_apply_unbound_cpumask(const cpumask_var_t unbound_cpumask)
if (!(wq->flags & __WQ_ORDERED_EXPLICIT)) {
wq->flags &= ~__WQ_ORDERED;
- } else if (pwq && pwq->frozen) {
+ } else if (pwq && !ordered_workqueue_ref_check(wq)) {
int i;
for (i = 0; i < 10; i++) {
msleep(10);
- if (!pwq->frozen)
+ if (ordered_workqueue_ref_check(wq))
break;
}
if (WARN_ON_ONCE(pwq->frozen))
--
2.39.3
Hi Waiman,
kernel test robot noticed the following build warnings:
[auto build test WARNING on tj-wq/for-next]
[also build test WARNING on next-20240202]
[cannot apply to linus/master v6.8-rc2]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]
url: https://github.com/intel-lab-lkp/linux/commits/Waiman-Long/workqueue-Skip-__WQ_DESTROYING-workqueues-when-updating-global-unbound-cpumask/20240203-234626
base: https://git.kernel.org/pub/scm/linux/kernel/git/tj/wq.git for-next
patch link: https://lore.kernel.org/r/20240203154334.791910-4-longman%40redhat.com
patch subject: [PATCH-wq v2 3/5] workqueue: Thaw frozen pwq in workqueue_apply_unbound_cpumask()
config: x86_64-randconfig-122-20240204 (https://download.01.org/0day-ci/archive/20240204/202402041854.YeHAF3wV-lkp@intel.com/config)
compiler: gcc-9 (Debian 9.3.0-22) 9.3.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240204/202402041854.YeHAF3wV-lkp@intel.com/reproduce)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202402041854.YeHAF3wV-lkp@intel.com/
sparse warnings: (new ones prefixed by >>)
kernel/workqueue.c:361:40: sparse: sparse: duplicate [noderef]
kernel/workqueue.c:361:40: sparse: sparse: multiple address spaces given: __percpu & __rcu
>> kernel/workqueue.c:6373:25: sparse: sparse: incompatible types in comparison expression (different address spaces):
kernel/workqueue.c:6373:25: sparse: struct pool_workqueue *
kernel/workqueue.c:6373:25: sparse: struct pool_workqueue [noderef] __rcu *
vim +6373 kernel/workqueue.c
6356
6357 /*
6358 * Check the given ordered workqueue to see if its non-default pwq's have
6359 * zero reference count and if so thaw the frozen default pwq.
6360 *
6361 * Return:
6362 * %true if dfl_pwq has been thawed or %false otherwise.
6363 */
6364 static bool ordered_workqueue_ref_check(struct workqueue_struct *wq)
6365 {
6366 int refs = 0;
6367 struct pool_workqueue *pwq;
6368
6369 if (!READ_ONCE(wq->dfl_pwq->frozen))
6370 return true;
6371 mutex_lock(&wq->mutex);
6372 for_each_pwq(pwq, wq) {
> 6373 if (pwq == wq->dfl_pwq)
6374 continue;
6375 refs += pwq->refcnt;
6376 }
6377 if (!refs)
6378 thaw_pwq(wq->dfl_pwq);
6379 mutex_unlock(&wq->mutex);
6380 return !refs;
6381 }
6382
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
On 2/4/24 05:15, kernel test robot wrote: > Hi Waiman, > > kernel test robot noticed the following build warnings: > > [auto build test WARNING on tj-wq/for-next] > [also build test WARNING on next-20240202] > [cannot apply to linus/master v6.8-rc2] > [If your patch is applied to the wrong git tree, kindly drop us a note. > And when submitting patch, we suggest to use '--base' as documented in > https://git-scm.com/docs/git-format-patch#_base_tree_information] > > url: https://github.com/intel-lab-lkp/linux/commits/Waiman-Long/workqueue-Skip-__WQ_DESTROYING-workqueues-when-updating-global-unbound-cpumask/20240203-234626 > base: https://git.kernel.org/pub/scm/linux/kernel/git/tj/wq.git for-next > patch link: https://lore.kernel.org/r/20240203154334.791910-4-longman%40redhat.com > patch subject: [PATCH-wq v2 3/5] workqueue: Thaw frozen pwq in workqueue_apply_unbound_cpumask() > config: x86_64-randconfig-122-20240204 (https://download.01.org/0day-ci/archive/20240204/202402041854.YeHAF3wV-lkp@intel.com/config) > compiler: gcc-9 (Debian 9.3.0-22) 9.3.0 > reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20240204/202402041854.YeHAF3wV-lkp@intel.com/reproduce) > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <lkp@intel.com> > | Closes: https://lore.kernel.org/oe-kbuild-all/202402041854.YeHAF3wV-lkp@intel.com/ > > sparse warnings: (new ones prefixed by >>) > kernel/workqueue.c:361:40: sparse: sparse: duplicate [noderef] > kernel/workqueue.c:361:40: sparse: sparse: multiple address spaces given: __percpu & __rcu >>> kernel/workqueue.c:6373:25: sparse: sparse: incompatible types in comparison expression (different address spaces): > kernel/workqueue.c:6373:25: sparse: struct pool_workqueue * > kernel/workqueue.c:6373:25: sparse: struct pool_workqueue [noderef] __rcu * OK, I didn't realize that a __rcu tag is added to the dfl_pwq in 6.9. Will change the patch series to use the appropriate helpers to avoid this kind of warnings. Cheers, Longman
© 2016 - 2026 Red Hat, Inc.