nvme/tcp: Add wq_unbound modparam for nvme_tcp_wq

[PATCH] nvme/tcp: Add wq_unbound modparam for nvme_tcp_wq

Posted by Li Feng 1 year, 11 months ago

The default nvme_tcp_wq will use all CPUs to process tasks. Sometimes it is
necessary to set CPU affinity to improve performance.

A new module parameter wq_unbound is added here. If set to true, users can
configure cpu affinity through
/sys/devices/virtual/workqueue/nvme_tcp_wq/cpumask.

Signed-off-by: Li Feng <fengli@smartx.com>
---
 drivers/nvme/host/tcp.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index a6d596e05602..5eaa275f436f 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -36,6 +36,14 @@ static int so_priority;
 module_param(so_priority, int, 0644);
 MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
 
+/*
+ * Use the unbound workqueue for nvme_tcp_wq, then we can set the cpu affinity
+ * from sysfs.
+ */
+static bool wq_unbound;
+module_param(wq_unbound, bool, 0644);
+MODULE_PARM_DESC(wq_unbound, "set unbound flag for nvme tcp work queue");
+
 /*
  * TLS handshake timeout
  */
@@ -1551,7 +1559,10 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
 	else if (nvme_tcp_poll_queue(queue))
 		n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
 				ctrl->io_queues[HCTX_TYPE_READ] - 1;
-	queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
+	if (wq_unbound)
+		queue->io_cpu = WORK_CPU_UNBOUND;
+	else
+		queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
 }
 
 static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid)
@@ -2790,6 +2801,8 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
 
 static int __init nvme_tcp_init_module(void)
 {
+	unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI;
+
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72);
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24);
@@ -2799,8 +2812,10 @@ static int __init nvme_tcp_init_module(void)
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128);
 	BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24);
 
-	nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq",
-			WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+	if (wq_unbound)
+		wq_flags |= WQ_UNBOUND | WQ_SYSFS;
+
+	nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0);
 	if (!nvme_tcp_wq)
 		return -ENOMEM;
 
-- 
2.44.0

Re: [PATCH] nvme/tcp: Add wq_unbound modparam for nvme_tcp_wq

Posted by Sagi Grimberg 1 year, 11 months ago


On 13/03/2024 10:55, Li Feng wrote:
> The default nvme_tcp_wq will use all CPUs to process tasks. Sometimes it is
> necessary to set CPU affinity to improve performance.
>
> A new module parameter wq_unbound is added here. If set to true, users can
> configure cpu affinity through
> /sys/devices/virtual/workqueue/nvme_tcp_wq/cpumask.
>
> Signed-off-by: Li Feng <fengli@smartx.com>
> ---
>   drivers/nvme/host/tcp.c | 21 ++++++++++++++++++---
>   1 file changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
> index a6d596e05602..5eaa275f436f 100644
> --- a/drivers/nvme/host/tcp.c
> +++ b/drivers/nvme/host/tcp.c
> @@ -36,6 +36,14 @@ static int so_priority;
>   module_param(so_priority, int, 0644);
>   MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
>   
> +/*
> + * Use the unbound workqueue for nvme_tcp_wq, then we can set the cpu affinity
> + * from sysfs.
> + */
> +static bool wq_unbound;
> +module_param(wq_unbound, bool, 0644);
> +MODULE_PARM_DESC(wq_unbound, "set unbound flag for nvme tcp work queue");

"Use unbound workqueue for nvme-tcp IO context (default false)"

> +
>   /*
>    * TLS handshake timeout
>    */
> @@ -1551,7 +1559,10 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
>   	else if (nvme_tcp_poll_queue(queue))
>   		n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
>   				ctrl->io_queues[HCTX_TYPE_READ] - 1;
> -	queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
> +	if (wq_unbound)
> +		queue->io_cpu = WORK_CPU_UNBOUND;
> +	else
> +		queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
>   }
>   
>   static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid)
> @@ -2790,6 +2801,8 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
>   
>   static int __init nvme_tcp_init_module(void)
>   {
> +	unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI;
> +
>   	BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
>   	BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72);
>   	BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24);
> @@ -2799,8 +2812,10 @@ static int __init nvme_tcp_init_module(void)
>   	BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128);
>   	BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24);
>   
> -	nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq",
> -			WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
> +	if (wq_unbound)
> +		wq_flags |= WQ_UNBOUND | WQ_SYSFS;

I think we should have WQ_SYSFS exposed always. Add it in a seperate patch
that comes before this.

> +
> +	nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0);
>   	if (!nvme_tcp_wq)
>   		return -ENOMEM;
>