[PATCH v4] block: plug attempts to batch allocate tags multiple times

Xue He posted 1 patch 2 months, 1 week ago
There is a newer version of this series
block/blk-mq.c | 39 ++++++++++++++++++---------------
lib/sbitmap.c  | 58 ++++++++++++++++++++++++++++++++++----------------
2 files changed, 62 insertions(+), 35 deletions(-)
[PATCH v4] block: plug attempts to batch allocate tags multiple times
Posted by Xue He 2 months, 1 week ago
This patch aims to enable batch allocation of sufficient tags after
batch IO submission with plug mechanism, thereby avoiding the need for
frequent individual requests when the initial allocation is
insufficient.

------------------------------------------------------------
Perf:
base code: __blk_mq_alloc_requests() 1.31%
patch: __blk_mq_alloc_requests() 0.71%
------------------------------------------------------------

---
changes since v1:
- Modify multiple batch registrations into a single loop to achieve
  the batch quantity

changes since v2:
- Modify the call location of remainder handling
- Refactoring sbitmap cleanup time

changes since v3:
- Add handle operation in loop
- Add helper sbitmap_find_bits_in_word

Signed-off-by: hexue <xue01.he@samsung.com>
---
 block/blk-mq.c | 39 ++++++++++++++++++---------------
 lib/sbitmap.c  | 58 ++++++++++++++++++++++++++++++++++----------------
 2 files changed, 62 insertions(+), 35 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index ba3a4b77f578..695ccc72e69f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -458,26 +458,31 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data)
 	unsigned long tag_mask;
 	int i, nr = 0;
 
-	tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
-	if (unlikely(!tag_mask))
-		return NULL;
+	do {
+		tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
+		if (unlikely(!tag_mask)) {
+			if (nr == 0)
+				return NULL;
+			break;
+		}
+		tags = blk_mq_tags_from_data(data);
+		for (i = 0; tag_mask; i++) {
+			if (!(tag_mask & (1UL << i)))
+				continue;
+			tag = tag_offset + i;
+			prefetch(tags->static_rqs[tag]);
+			tag_mask &= ~(1UL << i);
+			rq = blk_mq_rq_ctx_init(data, tags, tag);
+			rq_list_add_head(data->cached_rqs, rq);
+			data->nr_tags--;
+			nr++;
+		}
+		if (!(data->rq_flags & RQF_SCHED_TAGS))
+			blk_mq_add_active_requests(data->hctx, nr);
+	} while (data->nr_tags);
 
-	tags = blk_mq_tags_from_data(data);
-	for (i = 0; tag_mask; i++) {
-		if (!(tag_mask & (1UL << i)))
-			continue;
-		tag = tag_offset + i;
-		prefetch(tags->static_rqs[tag]);
-		tag_mask &= ~(1UL << i);
-		rq = blk_mq_rq_ctx_init(data, tags, tag);
-		rq_list_add_head(data->cached_rqs, rq);
-		nr++;
-	}
-	if (!(data->rq_flags & RQF_SCHED_TAGS))
-		blk_mq_add_active_requests(data->hctx, nr);
 	/* caller already holds a reference, add for remainder */
 	percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);
-	data->nr_tags -= nr;
 
 	return rq_list_pop(data->cached_rqs);
 }
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 4d188d05db15..c0a8da1beec9 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -208,6 +208,32 @@ static int sbitmap_find_bit_in_word(struct sbitmap_word *map,
 	return nr;
 }
 
+static unsigned long sbitmap_find_bits_in_word(struct sbitmap_word *map,
+				    unsigned int depth, unsigned int alloc_hint, bool wrap,
+				    unsigned long *val, int nr_tags, unsigned int map_depth)
+{
+	unsigned long local_val, nr;
+
+	while (1) {
+		local_val = READ_ONCE(map->word);
+		if (local_val == (1UL << (map_depth - 1)) - 1) {
+			if (!sbitmap_deferred_clear(map, depth, alloc_hint, wrap))
+				return -1UL;
+			continue;
+		}
+
+		nr = find_first_zero_bit(&local_val, map_depth);
+		if (nr + nr_tags <= map_depth)
+			break;
+
+		if (!sbitmap_deferred_clear(map, depth, alloc_hint, wrap))
+			return -1UL;
+	};
+
+	*val = local_val;
+	return nr;
+}
+
 static unsigned int __map_depth_with_shallow(const struct sbitmap *sb,
 					     int index,
 					     unsigned int shallow_depth)
@@ -534,26 +560,22 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
 		unsigned int map_depth = __map_depth(sb, index);
 		unsigned long val;
 
-		sbitmap_deferred_clear(map, 0, 0, 0);
-		val = READ_ONCE(map->word);
-		if (val == (1UL << (map_depth - 1)) - 1)
+		nr = sbitmap_find_bits_in_word(map, 0, 0, 0, &val, nr_tags, map_depth);
+		if (nr == -1UL)
 			goto next;
 
-		nr = find_first_zero_bit(&val, map_depth);
-		if (nr + nr_tags <= map_depth) {
-			atomic_long_t *ptr = (atomic_long_t *) &map->word;
-
-			get_mask = ((1UL << nr_tags) - 1) << nr;
-			while (!atomic_long_try_cmpxchg(ptr, &val,
-							  get_mask | val))
-				;
-			get_mask = (get_mask & ~val) >> nr;
-			if (get_mask) {
-				*offset = nr + (index << sb->shift);
-				update_alloc_hint_after_get(sb, depth, hint,
-							*offset + nr_tags - 1);
-				return get_mask;
-			}
+		atomic_long_t *ptr = (atomic_long_t *) &map->word;
+
+		get_mask = ((1UL << nr_tags) - 1) << nr;
+		while (!atomic_long_try_cmpxchg(ptr, &val,
+						  get_mask | val))
+			;
+		get_mask = (get_mask & ~val) >> nr;
+		if (get_mask) {
+			*offset = nr + (index << sb->shift);
+			update_alloc_hint_after_get(sb, depth, hint,
+						*offset + nr_tags - 1);
+			return get_mask;
 		}
 next:
 		/* Jump to next index. */
-- 
2.34.1
Re: [PATCH v4] block: plug attempts to batch allocate tags multiple times
Posted by Yu Kuai 2 months ago
Hi,

在 2025/10/10 14:35, Xue He 写道:
> This patch aims to enable batch allocation of sufficient tags after
> batch IO submission with plug mechanism, thereby avoiding the need for
> frequent individual requests when the initial allocation is
> insufficient.
> 
> ------------------------------------------------------------
> Perf:
> base code: __blk_mq_alloc_requests() 1.31%
> patch: __blk_mq_alloc_requests() 0.71%
> ------------------------------------------------------------
> 
> ---
> changes since v1:
> - Modify multiple batch registrations into a single loop to achieve
>    the batch quantity
> 
> changes since v2:
> - Modify the call location of remainder handling
> - Refactoring sbitmap cleanup time
> 
> changes since v3:
> - Add handle operation in loop
> - Add helper sbitmap_find_bits_in_word
> 
> Signed-off-by: hexue <xue01.he@samsung.com>
> ---
>   block/blk-mq.c | 39 ++++++++++++++++++---------------
>   lib/sbitmap.c  | 58 ++++++++++++++++++++++++++++++++++----------------
>   2 files changed, 62 insertions(+), 35 deletions(-)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index ba3a4b77f578..695ccc72e69f 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -458,26 +458,31 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data)
>   	unsigned long tag_mask;
>   	int i, nr = 0;
>   
> -	tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
> -	if (unlikely(!tag_mask))
> -		return NULL;
> +	do {
> +		tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset);
> +		if (unlikely(!tag_mask)) {
> +			if (nr == 0)
> +				return NULL;
> +			break;
> +		}
> +		tags = blk_mq_tags_from_data(data);
> +		for (i = 0; tag_mask; i++) {
> +			if (!(tag_mask & (1UL << i)))
> +				continue;
> +			tag = tag_offset + i;
> +			prefetch(tags->static_rqs[tag]);
> +			tag_mask &= ~(1UL << i);
> +			rq = blk_mq_rq_ctx_init(data, tags, tag);
> +			rq_list_add_head(data->cached_rqs, rq);
> +			data->nr_tags--;
> +			nr++;
> +		}
> +		if (!(data->rq_flags & RQF_SCHED_TAGS))
> +			blk_mq_add_active_requests(data->hctx, nr);
> +	} while (data->nr_tags);
>   
> -	tags = blk_mq_tags_from_data(data);
> -	for (i = 0; tag_mask; i++) {
> -		if (!(tag_mask & (1UL << i)))
> -			continue;
> -		tag = tag_offset + i;
> -		prefetch(tags->static_rqs[tag]);
> -		tag_mask &= ~(1UL << i);
> -		rq = blk_mq_rq_ctx_init(data, tags, tag);
> -		rq_list_add_head(data->cached_rqs, rq);
> -		nr++;
> -	}
> -	if (!(data->rq_flags & RQF_SCHED_TAGS))
> -		blk_mq_add_active_requests(data->hctx, nr);
>   	/* caller already holds a reference, add for remainder */
>   	percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);
> -	data->nr_tags -= nr;
>   
>   	return rq_list_pop(data->cached_rqs);
>   }
> diff --git a/lib/sbitmap.c b/lib/sbitmap.c
> index 4d188d05db15..c0a8da1beec9 100644
> --- a/lib/sbitmap.c
> +++ b/lib/sbitmap.c
> @@ -208,6 +208,32 @@ static int sbitmap_find_bit_in_word(struct sbitmap_word *map,
>   	return nr;
>   }
>   
> +static unsigned long sbitmap_find_bits_in_word(struct sbitmap_word *map,
> +				    unsigned int depth, unsigned int alloc_hint, bool wrap,
> +				    unsigned long *val, int nr_tags, unsigned int map_depth)

The parameters depth, alloc_hint, wrap are not necessary, the only
caller always pass in 0.

> +{
> +	unsigned long local_val, nr;
> +
> +	while (1) {
> +		local_val = READ_ONCE(map->word);
> +		if (local_val == (1UL << (map_depth - 1)) - 1) {
> +			if (!sbitmap_deferred_clear(map, depth, alloc_hint, wrap))
> +				return -1UL;
> +			continue;
> +		}
> +
> +		nr = find_first_zero_bit(&local_val, map_depth);
> +		if (nr + nr_tags <= map_depth)
> +			break;

With the respect we can allocate multiple times, and it's not necessary
to allocate nr_tags at once, perhaps it'll make  sense to skip this
checking.

> +
> +		if (!sbitmap_deferred_clear(map, depth, alloc_hint, wrap))
> +			return -1UL;
> +	};
> +
> +	*val = local_val;
> +	return nr;
> +}
> +
>   static unsigned int __map_depth_with_shallow(const struct sbitmap *sb,
>   					     int index,
>   					     unsigned int shallow_depth)
> @@ -534,26 +560,22 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
>   		unsigned int map_depth = __map_depth(sb, index);
>   		unsigned long val;
>   
> -		sbitmap_deferred_clear(map, 0, 0, 0);
> -		val = READ_ONCE(map->word);
> -		if (val == (1UL << (map_depth - 1)) - 1)
> +		nr = sbitmap_find_bits_in_word(map, 0, 0, 0, &val, nr_tags, map_depth);
> +		if (nr == -1UL)
>   			goto next;
>   
> -		nr = find_first_zero_bit(&val, map_depth);
> -		if (nr + nr_tags <= map_depth) {
> -			atomic_long_t *ptr = (atomic_long_t *) &map->word;
> -
> -			get_mask = ((1UL << nr_tags) - 1) << nr;
> -			while (!atomic_long_try_cmpxchg(ptr, &val,
> -							  get_mask | val))
> -				;
> -			get_mask = (get_mask & ~val) >> nr;
> -			if (get_mask) {
> -				*offset = nr + (index << sb->shift);
> -				update_alloc_hint_after_get(sb, depth, hint,
> -							*offset + nr_tags - 1);
> -				return get_mask;
> -			}
> +		atomic_long_t *ptr = (atomic_long_t *) &map->word;
> +
> +		get_mask = ((1UL << nr_tags) - 1) << nr;
> +		while (!atomic_long_try_cmpxchg(ptr, &val,
> +						  get_mask | val))
> +			;
> +		get_mask = (get_mask & ~val) >> nr;
> +		if (get_mask) {
> +			*offset = nr + (index << sb->shift);
> +			update_alloc_hint_after_get(sb, depth, hint,
> +						*offset + nr_tags - 1);
> +			return get_mask;

I feel it's better to fold in above into the helper and return get_mask
directly.

BTW, the sbitmap and blk-mq changes are not quite related, and you can
split them into seperate patches.

Thanks,
Kuai

>   		}
>   next:
>   		/* Jump to next index. */
> 

Re:[PATCH v4] block: plug attempts to batch allocate tags multiple times
Posted by Xue He 2 months ago
Hi Yu Kuai,

On 2025/10/14 15:28, Yu Kuai wrote:
>On 2025/10/10 14:35, Xue He wrote:
>>   static unsigned int __map_depth_with_shallow(const struct sbitmap *sb,
>>   					     int index,
>>   					     unsigned int shallow_depth)
>> @@ -534,26 +560,22 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
>>   		unsigned int map_depth = __map_depth(sb, index);
>>   		unsigned long val;
>>   
>> -		sbitmap_deferred_clear(map, 0, 0, 0);
>> -		val = READ_ONCE(map->word);
>> -		if (val == (1UL << (map_depth - 1)) - 1)
>> +		nr = sbitmap_find_bits_in_word(map, 0, 0, 0, &val, nr_tags, map_depth);
>> +		if (nr == -1UL)
>>   			goto next;
>>   
>> -		nr = find_first_zero_bit(&val, map_depth);
>> -		if (nr + nr_tags <= map_depth) {
>> -			atomic_long_t *ptr = (atomic_long_t *) &map->word;
>> -
>> -			get_mask = ((1UL << nr_tags) - 1) << nr;
>> -			while (!atomic_long_try_cmpxchg(ptr, &val,
>> -							  get_mask | val))
>> -				;
>> -			get_mask = (get_mask & ~val) >> nr;
>> -			if (get_mask) {
>> -				*offset = nr + (index << sb->shift);
>> -				update_alloc_hint_after_get(sb, depth, hint,
>> -							*offset + nr_tags - 1);
>> -				return get_mask;
>> -			}
>> +		atomic_long_t *ptr = (atomic_long_t *) &map->word;
>> +
>> +		get_mask = ((1UL << nr_tags) - 1) << nr;
>> +		while (!atomic_long_try_cmpxchg(ptr, &val,
>> +						  get_mask | val))
>> +			;
>> +		get_mask = (get_mask & ~val) >> nr;
>> +		if (get_mask) {
>> +			*offset = nr + (index << sb->shift);
>> +			update_alloc_hint_after_get(sb, depth, hint,
>> +						*offset + nr_tags - 1);
>> +			return get_mask;
>
>I feel it's better to fold in above into the helper and return get_mask
>directly.
Hi Yukuai, sorry I'm not sure that my understanding is correct, does it
mean modifying it in a way similar to the following? If so, I will repackage
the patch accordingly.

static unsigned long sbitmap_find_bits_in_word(unsigned long index, 
					struct sbitmap *sb, unsigned int *offset,
					unsigned int hint, unsigned int depth, int nr_tags)
{
	unsigned long val, nr, get_mask;
	struct sbitmap_word *map = &sb->map[index];
	unsigned int map_depth = __map_depth(sb, index);

	while (1) {
		val = READ_ONCE(map->word);
		if (val == (1UL << (map_depth - 1)) - 1) {
			if (!sbitmap_deferred_clear(map, 0, 0, 0))
				return 0;
			continue;
		}

		nr = find_first_zero_bit(&val, map_depth);
		if (nr != map_depth)
			break;

		if (!sbitmap_deferred_clear(map, 0, 0, 0))
			return 0;
	};

	atomic_long_t *ptr = (atomic_long_t *) &map->word;
	
	get_mask = ((1UL << nr_tags) - 1) << nr;
	while (!atomic_long_try_cmpxchg(ptr, &val,
					  get_mask | val))
		;
	get_mask = (get_mask & ~val) >> nr;
	if (get_mask){
		*offset = nr + (index << sb->shift);
		update_alloc_hint_after_get(sb, depth, hint,
					*offset + nr_tags - 1);
	}
	return get_mask;
}

and upper like

unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags,
						unsigned int *offset)
{
	struct sbitmap *sb = &sbq->sb;
	unsigned int hint, depth;
	unsigned long get_mask, index;
	int i;

	if (unlikely(sb->round_robin))
		return 0;

	depth = READ_ONCE(sb->depth);
	hint = update_alloc_hint_before_get(sb, depth);

	index = SB_NR_TO_INDEX(sb, hint);

	for (i = 0; i < sb->map_nr; i++) {
		get_mask = sbitmap_find_bits_in_word(index, sb, offset, hint,
							depth, nr_tags);
		if (get_mask) {
			return get_mask;
		}

		/* Jump to next index. */
		if (++index >= sb->map_nr)
			index = 0;
	}

	return 0;
}

Am I missing something? 

Thanks,
Xue
>
>BTW, the sbitmap and blk-mq changes are not quite related, and you can
>split them into seperate patches.
>
>Thanks,
>Kuai
>
>>   		}
>>   next:
>>   		/* Jump to next index. */
>>