[v4] Workqueue: add WQ_PERCPU, system_dfl_wq and system_percpu_wq

[PATCH v4 1/3] Workqueue: add system_percpu_wq and system_dfl_wq

Posted by Marco Crivellari 8 months ago

Currently if a user enqueue a work item using schedule_delayed_work() the
used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use
WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to
schedule_work() that is using system_wq and queue_work(), that makes use
again of WORK_CPU_UNBOUND.

This lack of consistentcy cannot be addressed without refactoring the API.

system_wq is a per-CPU worqueue, yet nothing in its name tells about that
CPU affinity constraint, which is very often not required by users. Make
it clear by adding a system_percpu_wq.

system_unbound_wq should be the default workqueue so as not to enforce
locality constraints for random work whenever it's not required.

Adding system_dfl_wq to encourage its use when unbound work should be used.

Suggested-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
---
 include/linux/workqueue.h | 8 +++++---
 kernel/workqueue.c        | 4 ++++
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index 6e30f275da77..502ec4a5e32c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -427,7 +427,7 @@ enum wq_consts {
 /*
  * System-wide workqueues which are always present.
  *
- * system_wq is the one used by schedule[_delayed]_work[_on]().
+ * system_percpu_wq is the one used by schedule[_delayed]_work[_on]().
  * Multi-CPU multi-threaded.  There are users which expect relatively
  * short queue flush time.  Don't queue works which can run for too
  * long.
@@ -438,7 +438,7 @@ enum wq_consts {
  * system_long_wq is similar to system_wq but may host long running
  * works.  Queue flushing might take relatively long.
  *
- * system_unbound_wq is unbound workqueue.  Workers are not bound to
+ * system_dfl_wq is unbound workqueue.  Workers are not bound to
  * any specific CPU, not concurrency managed, and all queued works are
  * executed immediately as long as max_active limit is not reached and
  * resources are available.
@@ -455,10 +455,12 @@ enum wq_consts {
  * system_bh[_highpri]_wq are convenience interface to softirq. BH work items
  * are executed in the queueing CPU's BH context in the queueing order.
  */
-extern struct workqueue_struct *system_wq;
+extern struct workqueue_struct *system_wq; /* use system_percpu_wq, this will be removed */
+extern struct workqueue_struct *system_percpu_wq;
 extern struct workqueue_struct *system_highpri_wq;
 extern struct workqueue_struct *system_long_wq;
 extern struct workqueue_struct *system_unbound_wq;
+extern struct workqueue_struct *system_dfl_wq;
 extern struct workqueue_struct *system_freezable_wq;
 extern struct workqueue_struct *system_power_efficient_wq;
 extern struct workqueue_struct *system_freezable_power_efficient_wq;
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 97f37b5bae66..7a3f53a9841e 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -505,12 +505,16 @@ static struct kthread_worker *pwq_release_worker __ro_after_init;
 
 struct workqueue_struct *system_wq __ro_after_init;
 EXPORT_SYMBOL(system_wq);
+struct workqueue_struct *system_percpu_wq __ro_after_init;
+EXPORT_SYMBOL(system_percpu_wq);
 struct workqueue_struct *system_highpri_wq __ro_after_init;
 EXPORT_SYMBOL_GPL(system_highpri_wq);
 struct workqueue_struct *system_long_wq __ro_after_init;
 EXPORT_SYMBOL_GPL(system_long_wq);
 struct workqueue_struct *system_unbound_wq __ro_after_init;
 EXPORT_SYMBOL_GPL(system_unbound_wq);
+struct workqueue_struct *system_dfl_wq __ro_after_init;
+EXPORT_SYMBOL_GPL(system_dfl_wq);
 struct workqueue_struct *system_freezable_wq __ro_after_init;
 EXPORT_SYMBOL_GPL(system_freezable_wq);
 struct workqueue_struct *system_power_efficient_wq __ro_after_init;
-- 
2.49.0

Re: [PATCH v4 1/3] Workqueue: add system_percpu_wq and system_dfl_wq

Posted by Frederic Weisbecker 8 months ago

Le Thu, Jun 12, 2025 at 03:33:33PM +0200, Marco Crivellari a écrit :
> Currently if a user enqueue a work item using schedule_delayed_work() the
> used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use
> WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to
> schedule_work() that is using system_wq and queue_work(), that makes use
> again of WORK_CPU_UNBOUND.
> 
> This lack of consistentcy cannot be addressed without refactoring the API.
> 
> system_wq is a per-CPU worqueue, yet nothing in its name tells about that
> CPU affinity constraint, which is very often not required by users. Make
> it clear by adding a system_percpu_wq.
> 
> system_unbound_wq should be the default workqueue so as not to enforce
> locality constraints for random work whenever it's not required.
> 
> Adding system_dfl_wq to encourage its use when unbound work should be used.
> 
> Suggested-by: Tejun Heo <tj@kernel.org>
> Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
> ---
>  include/linux/workqueue.h | 8 +++++---
>  kernel/workqueue.c        | 4 ++++
>  2 files changed, 9 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
> index 6e30f275da77..502ec4a5e32c 100644
> --- a/include/linux/workqueue.h
> +++ b/include/linux/workqueue.h
> @@ -427,7 +427,7 @@ enum wq_consts {
>  /*
>   * System-wide workqueues which are always present.
>   *
> - * system_wq is the one used by schedule[_delayed]_work[_on]().
> + * system_percpu_wq is the one used by schedule[_delayed]_work[_on]().
>   * Multi-CPU multi-threaded.  There are users which expect relatively
>   * short queue flush time.  Don't queue works which can run for too
>   * long.
> @@ -438,7 +438,7 @@ enum wq_consts {
>   * system_long_wq is similar to system_wq but may host long running
>   * works.  Queue flushing might take relatively long.
>   *
> - * system_unbound_wq is unbound workqueue.  Workers are not bound to
> + * system_dfl_wq is unbound workqueue.  Workers are not bound to
>   * any specific CPU, not concurrency managed, and all queued works are
>   * executed immediately as long as max_active limit is not reached and
>   * resources are available.
> @@ -455,10 +455,12 @@ enum wq_consts {
>   * system_bh[_highpri]_wq are convenience interface to softirq. BH work items
>   * are executed in the queueing CPU's BH context in the queueing order.
>   */
> -extern struct workqueue_struct *system_wq;
> +extern struct workqueue_struct *system_wq; /* use system_percpu_wq, this will be removed */
> +extern struct workqueue_struct *system_percpu_wq;
>  extern struct workqueue_struct *system_highpri_wq;
>  extern struct workqueue_struct *system_long_wq;
>  extern struct workqueue_struct *system_unbound_wq;
> +extern struct workqueue_struct *system_dfl_wq;
>  extern struct workqueue_struct *system_freezable_wq;
>  extern struct workqueue_struct *system_power_efficient_wq;
>  extern struct workqueue_struct *system_freezable_power_efficient_wq;
> diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> index 97f37b5bae66..7a3f53a9841e 100644
> --- a/kernel/workqueue.c
> +++ b/kernel/workqueue.c
> @@ -505,12 +505,16 @@ static struct kthread_worker *pwq_release_worker __ro_after_init;
>  
>  struct workqueue_struct *system_wq __ro_after_init;
>  EXPORT_SYMBOL(system_wq);
> +struct workqueue_struct *system_percpu_wq __ro_after_init;
> +EXPORT_SYMBOL(system_percpu_wq);
>  struct workqueue_struct *system_highpri_wq __ro_after_init;
>  EXPORT_SYMBOL_GPL(system_highpri_wq);
>  struct workqueue_struct *system_long_wq __ro_after_init;
>  EXPORT_SYMBOL_GPL(system_long_wq);
>  struct workqueue_struct *system_unbound_wq __ro_after_init;
>  EXPORT_SYMBOL_GPL(system_unbound_wq);
> +struct workqueue_struct *system_dfl_wq __ro_after_init;
> +EXPORT_SYMBOL_GPL(system_dfl_wq);
>  struct workqueue_struct *system_freezable_wq __ro_after_init;
>  EXPORT_SYMBOL_GPL(system_freezable_wq);
>  struct workqueue_struct *system_power_efficient_wq __ro_after_init;

Shouldn't you allocate system_percpu_wq and system_dfl_wq in
workqueue_init_early() ?

And yes I think we should allocate them and not make them a pointer to
system_wq and system_unbound_wq, this way you can more easily
warn deprecated uses of system_wq and system_unbound_wq in the future
after upcoming merge windows.

Thanks.

> -- 
> 2.49.0
> 

-- 
Frederic Weisbecker
SUSE Labs

Re: [PATCH v4 1/3] Workqueue: add system_percpu_wq and system_dfl_wq

Posted by Marco Crivellari 8 months ago

Hi Frederic,

I let the wq allocation together with the wq logic changes.
But if it's better to allocate directly here when we add the wq(s), I
will do so.

Thank you.



On Fri, Jun 13, 2025 at 3:05 PM Frederic Weisbecker <frederic@kernel.org> wrote:
>
> Le Thu, Jun 12, 2025 at 03:33:33PM +0200, Marco Crivellari a écrit :
> > Currently if a user enqueue a work item using schedule_delayed_work() the
> > used wq is "system_wq" (per-cpu wq) while queue_delayed_work() use
> > WORK_CPU_UNBOUND (used when a cpu is not specified). The same applies to
> > schedule_work() that is using system_wq and queue_work(), that makes use
> > again of WORK_CPU_UNBOUND.
> >
> > This lack of consistentcy cannot be addressed without refactoring the API.
> >
> > system_wq is a per-CPU worqueue, yet nothing in its name tells about that
> > CPU affinity constraint, which is very often not required by users. Make
> > it clear by adding a system_percpu_wq.
> >
> > system_unbound_wq should be the default workqueue so as not to enforce
> > locality constraints for random work whenever it's not required.
> >
> > Adding system_dfl_wq to encourage its use when unbound work should be used.
> >
> > Suggested-by: Tejun Heo <tj@kernel.org>
> > Signed-off-by: Marco Crivellari <marco.crivellari@suse.com>
> > ---
> >  include/linux/workqueue.h | 8 +++++---
> >  kernel/workqueue.c        | 4 ++++
> >  2 files changed, 9 insertions(+), 3 deletions(-)
> >
> > diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
> > index 6e30f275da77..502ec4a5e32c 100644
> > --- a/include/linux/workqueue.h
> > +++ b/include/linux/workqueue.h
> > @@ -427,7 +427,7 @@ enum wq_consts {
> >  /*
> >   * System-wide workqueues which are always present.
> >   *
> > - * system_wq is the one used by schedule[_delayed]_work[_on]().
> > + * system_percpu_wq is the one used by schedule[_delayed]_work[_on]().
> >   * Multi-CPU multi-threaded.  There are users which expect relatively
> >   * short queue flush time.  Don't queue works which can run for too
> >   * long.
> > @@ -438,7 +438,7 @@ enum wq_consts {
> >   * system_long_wq is similar to system_wq but may host long running
> >   * works.  Queue flushing might take relatively long.
> >   *
> > - * system_unbound_wq is unbound workqueue.  Workers are not bound to
> > + * system_dfl_wq is unbound workqueue.  Workers are not bound to
> >   * any specific CPU, not concurrency managed, and all queued works are
> >   * executed immediately as long as max_active limit is not reached and
> >   * resources are available.
> > @@ -455,10 +455,12 @@ enum wq_consts {
> >   * system_bh[_highpri]_wq are convenience interface to softirq. BH work items
> >   * are executed in the queueing CPU's BH context in the queueing order.
> >   */
> > -extern struct workqueue_struct *system_wq;
> > +extern struct workqueue_struct *system_wq; /* use system_percpu_wq, this will be removed */
> > +extern struct workqueue_struct *system_percpu_wq;
> >  extern struct workqueue_struct *system_highpri_wq;
> >  extern struct workqueue_struct *system_long_wq;
> >  extern struct workqueue_struct *system_unbound_wq;
> > +extern struct workqueue_struct *system_dfl_wq;
> >  extern struct workqueue_struct *system_freezable_wq;
> >  extern struct workqueue_struct *system_power_efficient_wq;
> >  extern struct workqueue_struct *system_freezable_power_efficient_wq;
> > diff --git a/kernel/workqueue.c b/kernel/workqueue.c
> > index 97f37b5bae66..7a3f53a9841e 100644
> > --- a/kernel/workqueue.c
> > +++ b/kernel/workqueue.c
> > @@ -505,12 +505,16 @@ static struct kthread_worker *pwq_release_worker __ro_after_init;
> >
> >  struct workqueue_struct *system_wq __ro_after_init;
> >  EXPORT_SYMBOL(system_wq);
> > +struct workqueue_struct *system_percpu_wq __ro_after_init;
> > +EXPORT_SYMBOL(system_percpu_wq);
> >  struct workqueue_struct *system_highpri_wq __ro_after_init;
> >  EXPORT_SYMBOL_GPL(system_highpri_wq);
> >  struct workqueue_struct *system_long_wq __ro_after_init;
> >  EXPORT_SYMBOL_GPL(system_long_wq);
> >  struct workqueue_struct *system_unbound_wq __ro_after_init;
> >  EXPORT_SYMBOL_GPL(system_unbound_wq);
> > +struct workqueue_struct *system_dfl_wq __ro_after_init;
> > +EXPORT_SYMBOL_GPL(system_dfl_wq);
> >  struct workqueue_struct *system_freezable_wq __ro_after_init;
> >  EXPORT_SYMBOL_GPL(system_freezable_wq);
> >  struct workqueue_struct *system_power_efficient_wq __ro_after_init;
>
> Shouldn't you allocate system_percpu_wq and system_dfl_wq in
> workqueue_init_early() ?
>
> And yes I think we should allocate them and not make them a pointer to
> system_wq and system_unbound_wq, this way you can more easily
> warn deprecated uses of system_wq and system_unbound_wq in the future
> after upcoming merge windows.
>
> Thanks.
>
> > --
> > 2.49.0
> >
>
> --
> Frederic Weisbecker
> SUSE Labs



--

Marco Crivellari

L3 Support Engineer, Technology & Product




marco.crivellari@suse.com

[PATCH v4 1/3] Workqueue: add system_percpu_wq and system_dfl_wq
[PATCH v4 2/3] Workqueue: add new WQ_PERCPU flag
[PATCH v4 3/3] [Doc] Workqueue: add WQ_PERCPU