[PATCH v4] nvme_core: scan namespaces asynchronously

Stuart Hayes posted 1 patch 1 year, 5 months ago
drivers/nvme/host/core.c | 40 +++++++++++++++++++++++++++++++++++++++-
1 file changed, 39 insertions(+), 1 deletion(-)
[PATCH v4] nvme_core: scan namespaces asynchronously
Posted by Stuart Hayes 1 year, 5 months ago
Use async function calls to make namespace scanning happen in parallel.

Without the patch, NVME namespaces are scanned serially, so it can take
a long time for all of a controller's namespaces to become available,
especially with a slower (TCP) interface with large number of
namespaces.

It is not uncommon to have large numbers (hundreds or thousands) of
namespaces on nvme-of with storage servers.

The time it took for all namespaces to show up after connecting (via
TCP) to a controller with 1002 namespaces was measured on one system:

network latency   without patch   with patch
     0                 6s            1s
    50ms             210s           10s
   100ms             417s           18s

Measurements taken on another system show the effect of the patch on the
time nvme_scan_work() took to complete, when connecting to a linux
nvme-of target with varying numbers of namespaces, on a network of
400us.

namespaces    without patch   with patch
     1            16ms           14ms
     2            24ms           16ms
     4            49ms           22ms
     8           101ms           33ms
    16           207ms           56ms
   100           1.4s           0.6s
  1000          12.9s           2.0s

On the same system, connecting to a local PCIe NVMe drive (a Samsung
PM1733) instead of a network target:

namespaces    without patch   with patch
     1            13ms           12ms
     2            41ms           13ms

Signed-off-by: Stuart Hayes <stuart.w.hayes@gmail.com>
---
changes from V3:
  * changed "next_idx" to "next_nsid" in async_scan_info (cosmetic)
  * added comments to struct async_scan_info
  * changed nvme_scan_ns_list() to use local ns_list pointer

changes from V2:
  * make a separate function nvme_scan_ns_async() that calls
    nvme_scan_ns(), instead of modifying nvme_scan_ns()
  * only scan asynchronously from nvme_scan_ns_list(), not from
    nvme_scan_ns_sequential()
  * provide more timing data in the commit message

changes from V1:
  * remove module param to enable/disable async scanning
  * add scan time measurements to commit message

 drivers/nvme/host/core.c | 40 +++++++++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 782090ce0bc1..dc43146dc03d 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4,6 +4,7 @@
  * Copyright (c) 2011-2014, Intel Corporation.
  */
 
+#include <linux/async.h>
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
 #include <linux/blk-integrity.h>
@@ -3952,6 +3953,35 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	}
 }
 
+/*
+ * struct async_scan_info - keeps track of controller & NSIDs to scan
+ * @ctrl:	Controller on which namespaces are being scanned
+ * @next_nsid:	Index of next NSID to scan in ns_list
+ * @ns_list:	Pointer to list of NSIDs to scan
+ *
+ * Note: There is a single async_scan_info structure shared by all instances
+ * of nvme_scan_ns_async() scanning a given controller, so the atomic
+ * operations on next_nsid are critical to ensure each instance scans a unique
+ * NSID.
+ */
+struct async_scan_info {
+	struct nvme_ctrl *ctrl;
+	atomic_t next_nsid;
+	__le32 *ns_list;
+};
+
+static void nvme_scan_ns_async(void *data, async_cookie_t cookie)
+{
+	struct async_scan_info *scan_info = data;
+	int idx;
+	u32 nsid;
+
+	idx = (u32)atomic_fetch_add(1, &scan_info->next_nsid);
+	nsid = le32_to_cpu(scan_info->ns_list[idx]);
+
+	nvme_scan_ns(scan_info->ctrl, nsid);
+}
+
 static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
 					unsigned nsid)
 {
@@ -3978,11 +4008,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
 	__le32 *ns_list;
 	u32 prev = 0;
 	int ret = 0, i;
+	ASYNC_DOMAIN(domain);
+	struct async_scan_info scan_info;
 
 	ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
 	if (!ns_list)
 		return -ENOMEM;
 
+	scan_info.ctrl = ctrl;
+	scan_info.ns_list = ns_list;
 	for (;;) {
 		struct nvme_command cmd = {
 			.identify.opcode	= nvme_admin_identify,
@@ -3998,19 +4032,23 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
 			goto free;
 		}
 
+		atomic_set(&scan_info.next_nsid, 0);
 		for (i = 0; i < nr_entries; i++) {
 			u32 nsid = le32_to_cpu(ns_list[i]);
 
 			if (!nsid)	/* end of the list? */
 				goto out;
-			nvme_scan_ns(ctrl, nsid);
+			async_schedule_domain(nvme_scan_ns_async, &scan_info,
+						&domain);
 			while (++prev < nsid)
 				nvme_ns_remove_by_nsid(ctrl, prev);
 		}
+		async_synchronize_full_domain(&domain);
 	}
  out:
 	nvme_remove_invalid_namespaces(ctrl, prev);
  free:
+	async_synchronize_full_domain(&domain);
 	kfree(ns_list);
 	return ret;
 }
-- 
2.39.3
Re: [PATCH v4] nvme_core: scan namespaces asynchronously
Posted by Thomas Weißschuh 1 year, 5 months ago
Hi,

two tiny nitpicks that can probably be fixed during patch application.

On 2024-07-17 13:55:50+0000, Stuart Hayes wrote:
> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
> index 782090ce0bc1..dc43146dc03d 100644
> --- a/drivers/nvme/host/core.c
> +++ b/drivers/nvme/host/core.c
> @@ -4,6 +4,7 @@
>   * Copyright (c) 2011-2014, Intel Corporation.
>   */
>  
> +#include <linux/async.h>
>  #include <linux/blkdev.h>
>  #include <linux/blk-mq.h>
>  #include <linux/blk-integrity.h>
> @@ -3952,6 +3953,35 @@ static void nvme_scan_ns(struct nvme_ctrl *ctrl, unsigned nsid)
>  	}
>  }
>  
> +/*

Kernel-doc with /** ?

> + * struct async_scan_info - keeps track of controller & NSIDs to scan
> + * @ctrl:	Controller on which namespaces are being scanned
> + * @next_nsid:	Index of next NSID to scan in ns_list
> + * @ns_list:	Pointer to list of NSIDs to scan
> + *
> + * Note: There is a single async_scan_info structure shared by all instances
> + * of nvme_scan_ns_async() scanning a given controller, so the atomic
> + * operations on next_nsid are critical to ensure each instance scans a unique
> + * NSID.
> + */
> +struct async_scan_info {
> +	struct nvme_ctrl *ctrl;
> +	atomic_t next_nsid;
> +	__le32 *ns_list;
> +};
> +
> +static void nvme_scan_ns_async(void *data, async_cookie_t cookie)
> +{
> +	struct async_scan_info *scan_info = data;
> +	int idx;
> +	u32 nsid;
> +
> +	idx = (u32)atomic_fetch_add(1, &scan_info->next_nsid);

atomic_fetch_inc() ?

> +	nsid = le32_to_cpu(scan_info->ns_list[idx]);
> +
> +	nvme_scan_ns(scan_info->ctrl, nsid);
> +}
> +
Re: [PATCH v4] nvme_core: scan namespaces asynchronously
Posted by Keith Busch 1 year, 5 months ago
On Wed, Jul 17, 2024 at 11:16:19PM +0200, Thomas Weißschuh wrote:
> On 2024-07-17 13:55:50+0000, Stuart Hayes wrote:
> > +static void nvme_scan_ns_async(void *data, async_cookie_t cookie)
> > +{
> > +	struct async_scan_info *scan_info = data;
> > +	int idx;
> > +	u32 nsid;
> > +
> > +	idx = (u32)atomic_fetch_add(1, &scan_info->next_nsid);
> 
> atomic_fetch_inc() ?

Good call. Also, that returns an int, and 'idx' is an int too, so the
(u32) cast inbetween is unnecessary. The highest 'next_nsid' could
possibly be anyway is 1023, so int is fine.
Re: [PATCH v4] nvme_core: scan namespaces asynchronously
Posted by Keith Busch 1 year, 5 months ago
On Wed, Jul 17, 2024 at 01:55:50PM -0500, Stuart Hayes wrote:

Looks good to me. Just one minor comment below.

> @@ -3978,11 +4008,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
>  	__le32 *ns_list;
>  	u32 prev = 0;
>  	int ret = 0, i;
> +	ASYNC_DOMAIN(domain);
> +	struct async_scan_info scan_info;
>  
>  	ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
>  	if (!ns_list)
>  		return -ENOMEM;
>  
> +	scan_info.ctrl = ctrl;
> +	scan_info.ns_list = ns_list;
>  	for (;;) {
>  		struct nvme_command cmd = {
>  			.identify.opcode	= nvme_admin_identify,
> @@ -3998,19 +4032,23 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
>  			goto free;
>  		}
>  
> +		atomic_set(&scan_info.next_nsid, 0);
>  		for (i = 0; i < nr_entries; i++) {
>  			u32 nsid = le32_to_cpu(ns_list[i]);
>  
>  			if (!nsid)	/* end of the list? */
>  				goto out;
> -			nvme_scan_ns(ctrl, nsid);
> +			async_schedule_domain(nvme_scan_ns_async, &scan_info,
> +						&domain);
>  			while (++prev < nsid)
>  				nvme_ns_remove_by_nsid(ctrl, prev);
>  		}
> +		async_synchronize_full_domain(&domain);
>  	}
>   out:
>  	nvme_remove_invalid_namespaces(ctrl, prev);
>   free:
> +	async_synchronize_full_domain(&domain);

A goto this "free" label appears to mean the async domain has nothing
scheduled, so synchronizing won't be necessary. This should be moved up
tharmlesso the "out" label, I think.

I can change that up when applying; just wanted to mention it in case
I'm missing something.
Re: [PATCH v4] nvme_core: scan namespaces asynchronously
Posted by stuart hayes 1 year, 5 months ago

On 7/17/2024 3:46 PM, Keith Busch wrote:
> On Wed, Jul 17, 2024 at 01:55:50PM -0500, Stuart Hayes wrote:
> 
> Looks good to me. Just one minor comment below.
> 
>> @@ -3978,11 +4008,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
>>   	__le32 *ns_list;
>>   	u32 prev = 0;
>>   	int ret = 0, i;
>> +	ASYNC_DOMAIN(domain);
>> +	struct async_scan_info scan_info;
>>   
>>   	ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
>>   	if (!ns_list)
>>   		return -ENOMEM;
>>   
>> +	scan_info.ctrl = ctrl;
>> +	scan_info.ns_list = ns_list;
>>   	for (;;) {
>>   		struct nvme_command cmd = {
>>   			.identify.opcode	= nvme_admin_identify,
>> @@ -3998,19 +4032,23 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
>>   			goto free;
>>   		}
>>   
>> +		atomic_set(&scan_info.next_nsid, 0);
>>   		for (i = 0; i < nr_entries; i++) {
>>   			u32 nsid = le32_to_cpu(ns_list[i]);
>>   
>>   			if (!nsid)	/* end of the list? */
>>   				goto out;
>> -			nvme_scan_ns(ctrl, nsid);
>> +			async_schedule_domain(nvme_scan_ns_async, &scan_info,
>> +						&domain);
>>   			while (++prev < nsid)
>>   				nvme_ns_remove_by_nsid(ctrl, prev);
>>   		}
>> +		async_synchronize_full_domain(&domain);
>>   	}
>>    out:
>>   	nvme_remove_invalid_namespaces(ctrl, prev);
>>    free:
>> +	async_synchronize_full_domain(&domain);
> 
> A goto this "free" label appears to mean the async domain has nothing
> scheduled, so synchronizing won't be necessary. This should be moved up
> tharmlesso the "out" label, I think.
> 
> I can change that up when applying; just wanted to mention it in case
> I'm missing something.

I agree, you aren't missing anything.  Thank you very much!  If you want
me to submit a new patch with this and Thomas' changes, let me know.
Re: [PATCH v4] nvme_core: scan namespaces asynchronously
Posted by Keith Busch 1 year, 5 months ago
On Wed, Jul 17, 2024 at 09:10:23PM -0500, stuart hayes wrote:
> I agree, you aren't missing anything.  Thank you very much!  If you want
> me to submit a new patch with this and Thomas' changes, let me know.

No problem, I've folded in Thomas's suggestions as well, pushed to
nvme-6.11. Thanks!
Re: [PATCH v4] nvme_core: scan namespaces asynchronously
Posted by Keith Busch 1 year, 4 months ago
On Thu, Jul 18, 2024 at 01:14:20PM -0600, Keith Busch wrote:
> On Wed, Jul 17, 2024 at 09:10:23PM -0500, stuart hayes wrote:
> > I agree, you aren't missing anything.  Thank you very much!  If you want
> > me to submit a new patch with this and Thomas' changes, let me know.
> 
> No problem, I've folded in Thomas's suggestions as well, pushed to
> nvme-6.11. Thanks!

On further consideration, this is a bit late for 6.11 inclusion. This
one could use time in linux-next, so it's top of my stack for 6.12 once
that branch gets created.
Re: [PATCH v4] nvme_core: scan namespaces asynchronously
Posted by Sagi Grimberg 1 year, 5 months ago
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>