[PATCH v2] block: plug attempts to batch allocate tags multiple times

Xue He posted 1 patch 2 weeks ago
There is a newer version of this series
block/blk-mq.c | 43 +++++++++++++++++++++++--------------------
lib/sbitmap.c  |  7 ++++---
2 files changed, 27 insertions(+), 23 deletions(-)
[PATCH v2] block: plug attempts to batch allocate tags multiple times
Posted by Xue He 2 weeks ago
In the existing plug mechanism, tags are allocated in batches based on
the number of requests. However, testing has shown that the plug only
attempts batch allocation of tags once at the beginning of a batch of
I/O operations. Since the tag_mask does not always have enough available
tags to satisfy the requested number, a full batch allocation is not
guaranteed to succeed each time. The remaining tags are then allocated
individually (occurs frequently), leading to multiple single-tag
allocation overheads.

This patch aims to retry batch allocation of tags when the initial batch
allocation fails to reach the requested number, thereby reducing the
overhead of individual allocation attempts.

--------------------------------------------------------------------
perf:
base code: __blk_mq_alloc_requests() 1.33%
patch:__blk_mq_alloc_requests() 0.72%
-------------------------------------------------------------------

Signed-off-by: hexue <xue01.he@samsung.com>
---
 block/blk-mq.c | 43 +++++++++++++++++++++++--------------------
 lib/sbitmap.c  |  7 ++++---
 2 files changed, 27 insertions(+), 23 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index ba3a4b77f578..3ed8da176831 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -456,28 +456,31 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data)
 	struct blk_mq_tags *tags;
 	struct request *rq;
 	unsigned long tag_mask;
-	int i, nr = 0;
 
-	tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
-	if (unlikely(!tag_mask))
-		return NULL;
+	do {
+		int i, nr = 0;
 
-	tags = blk_mq_tags_from_data(data);
-	for (i = 0; tag_mask; i++) {
-		if (!(tag_mask & (1UL << i)))
-			continue;
-		tag = tag_offset + i;
-		prefetch(tags->static_rqs[tag]);
-		tag_mask &= ~(1UL << i);
-		rq = blk_mq_rq_ctx_init(data, tags, tag);
-		rq_list_add_head(data->cached_rqs, rq);
-		nr++;
-	}
-	if (!(data->rq_flags & RQF_SCHED_TAGS))
-		blk_mq_add_active_requests(data->hctx, nr);
-	/* caller already holds a reference, add for remainder */
-	percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);
-	data->nr_tags -= nr;
+		tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
+		if (unlikely(!tag_mask))
+			return NULL;
+
+		tags = blk_mq_tags_from_data(data);
+		for (i = 0; tag_mask; i++) {
+			if (!(tag_mask & (1UL << i)))
+				continue;
+			tag = tag_offset + i;
+			prefetch(tags->static_rqs[tag]);
+			tag_mask &= ~(1UL << i);
+			rq = blk_mq_rq_ctx_init(data, tags, tag);
+			rq_list_add_head(data->cached_rqs, rq);
+			nr++;
+		}
+		if (!(data->rq_flags & RQF_SCHED_TAGS))
+			blk_mq_add_active_requests(data->hctx, nr);
+		/* caller already holds a reference, add for remainder */
+		percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);
+		data->nr_tags -= nr;
+	} while (data->nr_tags);
 
 	return rq_list_pop(data->cached_rqs);
 }
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 4d188d05db15..4ac303842aec 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -534,10 +534,11 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
 		unsigned int map_depth = __map_depth(sb, index);
 		unsigned long val;
 
-		sbitmap_deferred_clear(map, 0, 0, 0);
 		val = READ_ONCE(map->word);
-		if (val == (1UL << (map_depth - 1)) - 1)
-			goto next;
+		if (val == (1UL << (map_depth - 1)) - 1) {
+			if (!sbitmap_deferred_clear(map, map_depth, 0, 0))
+				goto next;
+		}
 
 		nr = find_first_zero_bit(&val, map_depth);
 		if (nr + nr_tags <= map_depth) {
-- 
2.34.1
Re: [PATCH v2] block: plug attempts to batch allocate tags multiple times
Posted by Yu Kuai 1 week ago
Hi,

I'm not in the cc list, so it can take sometime before I notice this
patch.

在 2025/09/18 15:55, Xue He 写道:
> In the existing plug mechanism, tags are allocated in batches based on
> the number of requests. However, testing has shown that the plug only
> attempts batch allocation of tags once at the beginning of a batch of
> I/O operations. Since the tag_mask does not always have enough available
> tags to satisfy the requested number, a full batch allocation is not
> guaranteed to succeed each time. The remaining tags are then allocated
> individually (occurs frequently), leading to multiple single-tag
> allocation overheads.
> 
> This patch aims to retry batch allocation of tags when the initial batch
> allocation fails to reach the requested number, thereby reducing the
> overhead of individual allocation attempts.
> 
> --------------------------------------------------------------------
> perf:
> base code: __blk_mq_alloc_requests() 1.33%
> patch:__blk_mq_alloc_requests() 0.72%
> -------------------------------------------------------------------
> 
> Signed-off-by: hexue <xue01.he@samsung.com>
> ---

Please add change log.

>   block/blk-mq.c | 43 +++++++++++++++++++++++--------------------
>   lib/sbitmap.c  |  7 ++++---
>   2 files changed, 27 insertions(+), 23 deletions(-)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index ba3a4b77f578..3ed8da176831 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -456,28 +456,31 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data)
>   	struct blk_mq_tags *tags;
>   	struct request *rq;
>   	unsigned long tag_mask;
> -	int i, nr = 0;
>   
> -	tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
> -	if (unlikely(!tag_mask))
> -		return NULL;
> +	do {
> +		int i, nr = 0;
>   
> -	tags = blk_mq_tags_from_data(data);
> -	for (i = 0; tag_mask; i++) {
> -		if (!(tag_mask & (1UL << i)))
> -			continue;
> -		tag = tag_offset + i;
> -		prefetch(tags->static_rqs[tag]);
> -		tag_mask &= ~(1UL << i);
> -		rq = blk_mq_rq_ctx_init(data, tags, tag);
> -		rq_list_add_head(data->cached_rqs, rq);
> -		nr++;
> -	}
> -	if (!(data->rq_flags & RQF_SCHED_TAGS))
> -		blk_mq_add_active_requests(data->hctx, nr);
> -	/* caller already holds a reference, add for remainder */
> -	percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);
> -	data->nr_tags -= nr;
> +		tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
> +		if (unlikely(!tag_mask))
> +			return NULL;
> +
> +		tags = blk_mq_tags_from_data(data);
> +		for (i = 0; tag_mask; i++) {
> +			if (!(tag_mask & (1UL << i)))
> +				continue;
> +			tag = tag_offset + i;
> +			prefetch(tags->static_rqs[tag]);
> +			tag_mask &= ~(1UL << i);
> +			rq = blk_mq_rq_ctx_init(data, tags, tag);
> +			rq_list_add_head(data->cached_rqs, rq);
> +			nr++;
> +		}
> +		if (!(data->rq_flags & RQF_SCHED_TAGS))
> +			blk_mq_add_active_requests(data->hctx, nr);
> +		/* caller already holds a reference, add for remainder */
> +		percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);

This should move outside of the loop, the remainder handling is one time
thing.

> +		data->nr_tags -= nr;
> +	} while (data->nr_tags);
>   
>   	return rq_list_pop(data->cached_rqs);
>   }
> diff --git a/lib/sbitmap.c b/lib/sbitmap.c
> index 4d188d05db15..4ac303842aec 100644
> --- a/lib/sbitmap.c
> +++ b/lib/sbitmap.c
> @@ -534,10 +534,11 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
>   		unsigned int map_depth = __map_depth(sb, index);
>   		unsigned long val;
>   
> -		sbitmap_deferred_clear(map, 0, 0, 0);
>   		val = READ_ONCE(map->word);
> -		if (val == (1UL << (map_depth - 1)) - 1)
> -			goto next;
> +		if (val == (1UL << (map_depth - 1)) - 1) {
> +			if (!sbitmap_deferred_clear(map, map_depth, 0, 0))
> +				goto next;

This looks wrong, you're still using the old val after
sbitmap_deferred_clear().
> +		}
>   
>   		nr = find_first_zero_bit(&val, map_depth);
>   		if (nr + nr_tags <= map_depth) {
> 

And I think if above checking failed, sbitmap_deferred_clear() should be
called and retry as well.

Thanks,
Kuai

Re: [PATCH] block: plug attempts to batch allocate tags multiple times
Posted by Xue He 1 week ago
On 2025/09/24 18:23, Yu Kuai wrote:
>On 2025/09/18 15:55, Xue He write:
>Hi,
>
>I'm not in the cc list, so it can take sometime before I notice this
>patch.

Oh sorry I missed this, I'll add you in cc list, and resend the v3 patch,
thank you.
Re: [PATCH] block: plug attempts to batch allocate tags multiple times
Posted by Xue He 1 week, 1 day ago
>+		if (val == (1UL << (map_depth - 1)) - 1) {
>+			if (!sbitmap_deferred_clear(map, map_depth, 0, 0))
>+				goto next;
>+		}
> 
> 		nr = find_first_zero_bit(&val, map_depth);
> 		if (nr + nr_tags <= map_depth) {

Hi, just a kindly ping...