[PATCH] workqueue: Make sure that wq_unbound_cpumask is never empty

Tejun Heo posted 1 patch 2 years ago
kernel/workqueue.c |   22 +++++++++++++++-------
1 file changed, 15 insertions(+), 7 deletions(-)
[PATCH] workqueue: Make sure that wq_unbound_cpumask is never empty
Posted by Tejun Heo 2 years ago
During boot, depending on how the housekeeping and workqueue.unbound_cpus
masks are set, wq_unbound_cpumask can end up empty. Since 8639ecebc9b1
("workqueue: Implement non-strict affinity scope for unbound workqueues"),
this may end up feeding -1 as a CPU number into scheduler leading to oopses.

  BUG: unable to handle page fault for address: ffffffff8305e9c0
  #PF: supervisor read access in kernel mode
  #PF: error_code(0x0000) - not-present page
  ...
  Call Trace:
   <TASK>
   select_idle_sibling+0x79/0xaf0
   select_task_rq_fair+0x1cb/0x7b0
   try_to_wake_up+0x29c/0x5c0
   wake_up_process+0x19/0x20
   kick_pool+0x5e/0xb0
   __queue_work+0x119/0x430
   queue_work_on+0x29/0x30
  ...

An empty wq_unbound_cpumask is a clear misconfiguration and already
disallowed once system is booted up. Let's warn on and ignore
unbound_cpumask restrictions which lead to no unbound cpus. While at it,
also remove now unncessary empty check on wq_unbound_cpumask in
wq_select_unbound_cpu().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Yong He <alexyonghe@tencent.com>
Link: http://lkml.kernel.org/r/20231120121623.119780-1-alexyonghe@tencent.com
Fixes: 8639ecebc9b1 ("workqueue: Implement non-strict affinity scope for unbound workqueues")
Cc: stable@vger.kernel.org # v6.6+
---
Hello,

Yong He, zhuangel570, can you please verify that this patch makes the oops
go away? Waiman, this touches code that you've recently worked on. AFAICS,
they shouldn't interact or cause conflicts. cc'ing just in case.

Thanks.

 kernel/workqueue.c |   22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 6e578f576a6f..0295291d54bc 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1684,9 +1684,6 @@ static int wq_select_unbound_cpu(int cpu)
 		pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n");
 	}
 
-	if (cpumask_empty(wq_unbound_cpumask))
-		return cpu;
-
 	new_cpu = __this_cpu_read(wq_rr_cpu_last);
 	new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
 	if (unlikely(new_cpu >= nr_cpu_ids)) {
@@ -6515,6 +6512,17 @@ static inline void wq_watchdog_init(void) { }
 
 #endif	/* CONFIG_WQ_WATCHDOG */
 
+static void __init restrict_unbound_cpumask(const char *name, const struct cpumask *mask)
+{
+	if (!cpumask_intersects(wq_unbound_cpumask, mask)) {
+		pr_warn("workqueue: Restricting unbound_cpumask (%*pb) with %s (%*pb) leaves no CPU, ignoring\n",
+			cpumask_pr_args(wq_unbound_cpumask), name, cpumask_pr_args(mask));
+		return;
+	}
+
+	cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, mask);
+}
+
 /**
  * workqueue_init_early - early init for workqueue subsystem
  *
@@ -6534,11 +6542,11 @@ void __init workqueue_init_early(void)
 	BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
 
 	BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
-	cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_WQ));
-	cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN));
-
+	cpumask_copy(wq_unbound_cpumask, cpu_possible_mask);
+	restrict_unbound_cpumask("HK_TYPE_WQ", housekeeping_cpumask(HK_TYPE_WQ));
+	restrict_unbound_cpumask("HK_TYPE_DOMAIN", housekeeping_cpumask(HK_TYPE_DOMAIN));
 	if (!cpumask_empty(&wq_cmdline_cpumask))
-		cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, &wq_cmdline_cpumask);
+		restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask);
 
 	pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
Re: [PATCH] workqueue: Make sure that wq_unbound_cpumask is never empty
Posted by Tejun Heo 2 years ago
On Tue, Nov 21, 2023 at 11:39:36AM -1000, Tejun Heo wrote:
> During boot, depending on how the housekeeping and workqueue.unbound_cpus
> masks are set, wq_unbound_cpumask can end up empty. Since 8639ecebc9b1
> ("workqueue: Implement non-strict affinity scope for unbound workqueues"),
> this may end up feeding -1 as a CPU number into scheduler leading to oopses.
> 
>   BUG: unable to handle page fault for address: ffffffff8305e9c0
>   #PF: supervisor read access in kernel mode
>   #PF: error_code(0x0000) - not-present page
>   ...
>   Call Trace:
>    <TASK>
>    select_idle_sibling+0x79/0xaf0
>    select_task_rq_fair+0x1cb/0x7b0
>    try_to_wake_up+0x29c/0x5c0
>    wake_up_process+0x19/0x20
>    kick_pool+0x5e/0xb0
>    __queue_work+0x119/0x430
>    queue_work_on+0x29/0x30
>   ...
> 
> An empty wq_unbound_cpumask is a clear misconfiguration and already
> disallowed once system is booted up. Let's warn on and ignore
> unbound_cpumask restrictions which lead to no unbound cpus. While at it,
> also remove now unncessary empty check on wq_unbound_cpumask in
> wq_select_unbound_cpu().
> 
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Reported-by: Yong He <alexyonghe@tencent.com>
> Link: http://lkml.kernel.org/r/20231120121623.119780-1-alexyonghe@tencent.com
> Fixes: 8639ecebc9b1 ("workqueue: Implement non-strict affinity scope for unbound workqueues")
> Cc: stable@vger.kernel.org # v6.6+

Applied to wq/for-6.7-fixes.

Thanks.

-- 
tejun
Re: [PATCH] workqueue: Make sure that wq_unbound_cpumask is never empty
Posted by zhuangel570 2 years ago
On Wed, Nov 22, 2023 at 5:39 AM Tejun Heo <tj@kernel.org> wrote:
>
> During boot, depending on how the housekeeping and workqueue.unbound_cpus
> masks are set, wq_unbound_cpumask can end up empty. Since 8639ecebc9b1
> ("workqueue: Implement non-strict affinity scope for unbound workqueues"),
> this may end up feeding -1 as a CPU number into scheduler leading to oopses.
>
>   BUG: unable to handle page fault for address: ffffffff8305e9c0
>   #PF: supervisor read access in kernel mode
>   #PF: error_code(0x0000) - not-present page
>   ...
>   Call Trace:
>    <TASK>
>    select_idle_sibling+0x79/0xaf0
>    select_task_rq_fair+0x1cb/0x7b0
>    try_to_wake_up+0x29c/0x5c0
>    wake_up_process+0x19/0x20
>    kick_pool+0x5e/0xb0
>    __queue_work+0x119/0x430
>    queue_work_on+0x29/0x30
>   ...
>
> An empty wq_unbound_cpumask is a clear misconfiguration and already
> disallowed once system is booted up. Let's warn on and ignore
> unbound_cpumask restrictions which lead to no unbound cpus. While at it,
> also remove now unncessary empty check on wq_unbound_cpumask in
> wq_select_unbound_cpu().
>
> Signed-off-by: Tejun Heo <tj@kernel.org>
> Reported-by: Yong He <alexyonghe@tencent.com>
> Link: http://lkml.kernel.org/r/20231120121623.119780-1-alexyonghe@tencent.com
> Fixes: 8639ecebc9b1 ("workqueue: Implement non-strict affinity scope for unbound workqueues")
> Cc: stable@vger.kernel.org # v6.6+
> ---
> Hello,
>
> Yong He, zhuangel570, can you please verify that this patch makes the oops
> go away? Waiman, this touches code that you've recently worked on. AFAICS,
> they shouldn't interact or cause conflicts. cc'ing just in case.

Sure.
I port this patch to my 6.7 branch, and the kernel could boot successfully on BM
and VM, with the same configurations, also I can see the new added warning, so
this patch solves the oops.

So, one last check, do you think we still need to check return value from
cpumask_any_distribute() to make sure kick_pool() set a correct wake_cpu?

Tested-by: Yong He <alexyonghe@tencent.com>

>
> Thanks.
>
>  kernel/workqueue.c |   22 +++++++++++++++-------
>  1 file changed, 15 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> index 6e578f576a6f..0295291d54bc 100644
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -1684,9 +1684,6 @@ static int wq_select_unbound_cpu(int cpu)
>                 pr_warn_once("workqueue: round-robin CPU selection forced, expect performance impact\n");
>         }
>
> -       if (cpumask_empty(wq_unbound_cpumask))
> -               return cpu;
> -
>         new_cpu = __this_cpu_read(wq_rr_cpu_last);
>         new_cpu = cpumask_next_and(new_cpu, wq_unbound_cpumask, cpu_online_mask);
>         if (unlikely(new_cpu >= nr_cpu_ids)) {
> @@ -6515,6 +6512,17 @@ static inline void wq_watchdog_init(void) { }
>
>  #endif /* CONFIG_WQ_WATCHDOG */
>
> +static void __init restrict_unbound_cpumask(const char *name, const struct cpumask *mask)
> +{
> +       if (!cpumask_intersects(wq_unbound_cpumask, mask)) {
> +               pr_warn("workqueue: Restricting unbound_cpumask (%*pb) with %s (%*pb) leaves no CPU, ignoring\n",
> +                       cpumask_pr_args(wq_unbound_cpumask), name, cpumask_pr_args(mask));
> +               return;
> +       }
> +
> +       cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, mask);
> +}
> +
>  /**
>   * workqueue_init_early - early init for workqueue subsystem
>   *
> @@ -6534,11 +6542,11 @@ void __init workqueue_init_early(void)
>         BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long));
>
>         BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL));
> -       cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_WQ));
> -       cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN));
> -
> +       cpumask_copy(wq_unbound_cpumask, cpu_possible_mask);
> +       restrict_unbound_cpumask("HK_TYPE_WQ", housekeeping_cpumask(HK_TYPE_WQ));
> +       restrict_unbound_cpumask("HK_TYPE_DOMAIN", housekeeping_cpumask(HK_TYPE_DOMAIN));
>         if (!cpumask_empty(&wq_cmdline_cpumask))
> -               cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, &wq_cmdline_cpumask);
> +               restrict_unbound_cpumask("workqueue.unbound_cpus", &wq_cmdline_cpumask);
>
>         pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC);
>