[v1] blk-mq: introduce new queue attribute async_depth

[PATCH 3/7] blk-mq: add a new queue sysfs attribute async_depth

Posted by Yu Kuai 4 months, 1 week ago

From: Yu Kuai <yukuai3@huawei.com>

Add a new field async_depth to request_queue and related APIs, this is
currently not used, following patches will convert elevators to use
this instead of internal async_depth.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-core.c       |  1 +
 block/blk-mq.c         |  4 ++++
 block/blk-sysfs.c      | 47 ++++++++++++++++++++++++++++++++++++++++++
 block/elevator.c       |  1 +
 include/linux/blkdev.h |  1 +
 5 files changed, 54 insertions(+)

diff --git a/block/blk-core.c b/block/blk-core.c
index dd39ff651095..76df70cfc103 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -463,6 +463,7 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
 	fs_reclaim_release(GFP_KERNEL);
 
 	q->nr_requests = BLKDEV_DEFAULT_RQ;
+	q->async_depth = BLKDEV_DEFAULT_RQ;
 
 	return q;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 09f579414161..260e54fa48f0 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -529,6 +529,8 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
 			data->rq_flags |= RQF_USE_SCHED;
 			if (ops->limit_depth)
 				ops->limit_depth(data->cmd_flags, data);
+			else if (!blk_mq_sched_sync_request(data->cmd_flags))
+				data->shallow_depth = q->async_depth;
 		}
 	} else {
 		blk_mq_tag_busy(data->hctx);
@@ -4605,6 +4607,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	spin_lock_init(&q->requeue_lock);
 
 	q->nr_requests = set->queue_depth;
+	q->async_depth = set->queue_depth;
 
 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 	blk_mq_map_swqueue(q);
@@ -4972,6 +4975,7 @@ struct elevator_tags *blk_mq_update_nr_requests(struct request_queue *q,
 	}
 
 	q->nr_requests = nr;
+	q->async_depth = nr;
 	if (q->elevator && q->elevator->type->ops.depth_updated)
 		q->elevator->type->ops.depth_updated(q);
 
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 76c47fe9b8d6..9553cc022c7e 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -127,6 +127,51 @@ queue_requests_store(struct gendisk *disk, const char *page, size_t count)
 	return ret;
 }
 
+static ssize_t queue_async_depth_show(struct gendisk *disk, char *page)
+{
+	ssize_t ret;
+
+	mutex_lock(&disk->queue->elevator_lock);
+	ret = queue_var_show(disk->queue->async_depth, page);
+	mutex_unlock(&disk->queue->elevator_lock);
+	return ret;
+}
+
+static ssize_t
+queue_async_depth_store(struct gendisk *disk, const char *page, size_t count)
+{
+	struct request_queue *q = disk->queue;
+	unsigned int memflags;
+	unsigned long nr;
+	int ret;
+
+	if (!queue_is_mq(q))
+		return -EINVAL;
+
+	ret = queue_var_store(&nr, page, count);
+	if (ret < 0)
+		return ret;
+
+	if (nr == 0)
+		return -EINVAL;
+
+	memflags = blk_mq_freeze_queue(q);
+	mutex_lock(&q->elevator_lock);
+
+	if (q->elevator) {
+		q->async_depth = min(q->nr_requests, nr);
+		if (q->elevator->type->ops.depth_updated)
+			q->elevator->type->ops.depth_updated(q);
+	} else {
+		ret = -EINVAL;
+	}
+
+	mutex_unlock(&q->elevator_lock);
+	blk_mq_unfreeze_queue(q, memflags);
+
+	return ret;
+}
+
 static ssize_t queue_ra_show(struct gendisk *disk, char *page)
 {
 	ssize_t ret;
@@ -542,6 +587,7 @@ static struct queue_sysfs_entry _prefix##_entry = {	\
 }
 
 QUEUE_RW_ENTRY(queue_requests, "nr_requests");
+QUEUE_RW_ENTRY(queue_async_depth, "async_depth");
 QUEUE_RW_ENTRY(queue_ra, "read_ahead_kb");
 QUEUE_LIM_RW_ENTRY(queue_max_sectors, "max_sectors_kb");
 QUEUE_LIM_RO_ENTRY(queue_max_hw_sectors, "max_hw_sectors_kb");
@@ -764,6 +810,7 @@ static struct attribute *blk_mq_queue_attrs[] = {
 	 */
 	&elv_iosched_entry.attr,
 	&queue_requests_entry.attr,
+	&queue_async_depth_entry.attr,
 #ifdef CONFIG_BLK_WBT
 	&queue_wb_lat_entry.attr,
 #endif
diff --git a/block/elevator.c b/block/elevator.c
index e2ebfbf107b3..8f510cb881ba 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -601,6 +601,7 @@ static int elevator_switch(struct request_queue *q, struct elv_change_ctx *ctx)
 		blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
 		q->elevator = NULL;
 		q->nr_requests = q->tag_set->queue_depth;
+		q->async_depth = q->tag_set->queue_depth;
 	}
 	blk_add_trace_msg(q, "elv switch: %s", ctx->name);
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 02c006fb94c5..1d470ac71c64 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -542,6 +542,7 @@ struct request_queue {
 	 * queue settings
 	 */
 	unsigned int		nr_requests;	/* Max # of requests */
+	unsigned int		async_depth;	/* Max # of async requests */
 
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
 	struct blk_crypto_profile *crypto_profile;
-- 
2.39.2

Re: [PATCH 3/7] blk-mq: add a new queue sysfs attribute async_depth

Posted by Nilay Shroff 4 months, 1 week ago


On 9/30/25 12:41 PM, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> Add a new field async_depth to request_queue and related APIs, this is
> currently not used, following patches will convert elevators to use
> this instead of internal async_depth.
> 
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  block/blk-core.c       |  1 +
>  block/blk-mq.c         |  4 ++++
>  block/blk-sysfs.c      | 47 ++++++++++++++++++++++++++++++++++++++++++
>  block/elevator.c       |  1 +
>  include/linux/blkdev.h |  1 +
>  5 files changed, 54 insertions(+)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index dd39ff651095..76df70cfc103 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -463,6 +463,7 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
>  	fs_reclaim_release(GFP_KERNEL);
>  
>  	q->nr_requests = BLKDEV_DEFAULT_RQ;
> +	q->async_depth = BLKDEV_DEFAULT_RQ;
>  
>  	return q;
>  
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index 09f579414161..260e54fa48f0 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -529,6 +529,8 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
>  			data->rq_flags |= RQF_USE_SCHED;
>  			if (ops->limit_depth)
>  				ops->limit_depth(data->cmd_flags, data);
> +			else if (!blk_mq_sched_sync_request(data->cmd_flags))
> +				data->shallow_depth = q->async_depth;
>  		}

In the subsequent patches, I saw that ->limit_depth is still used for the
BFQ scheduler. Given that, it seems more consistent to also retain ->limit_depth
for the mq-deadline and Kyber schedulers, and set data->shallow_depth within their
respective ->limit_depth methods. If we take this approach, the additional 
blk_mq_sched_sync_request() check above becomes unnecessary.

So IMO:
- Keep ->limit_depth for all schedulers (bfq, mq-deadline, kyber).
- Remove the extra blk_mq_sched_sync_request() check from the core code.

Thanks,
--Nilay

Re: [PATCH 3/7] blk-mq: add a new queue sysfs attribute async_depth

Posted by Yu Kuai 4 months, 1 week ago

Hi,

在 2025/10/2 23:10, Nilay Shroff 写道:
>
> On 9/30/25 12:41 PM, Yu Kuai wrote:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Add a new field async_depth to request_queue and related APIs, this is
>> currently not used, following patches will convert elevators to use
>> this instead of internal async_depth.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   block/blk-core.c       |  1 +
>>   block/blk-mq.c         |  4 ++++
>>   block/blk-sysfs.c      | 47 ++++++++++++++++++++++++++++++++++++++++++
>>   block/elevator.c       |  1 +
>>   include/linux/blkdev.h |  1 +
>>   5 files changed, 54 insertions(+)
>>
>> diff --git a/block/blk-core.c b/block/blk-core.c
>> index dd39ff651095..76df70cfc103 100644
>> --- a/block/blk-core.c
>> +++ b/block/blk-core.c
>> @@ -463,6 +463,7 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
>>   	fs_reclaim_release(GFP_KERNEL);
>>   
>>   	q->nr_requests = BLKDEV_DEFAULT_RQ;
>> +	q->async_depth = BLKDEV_DEFAULT_RQ;
>>   
>>   	return q;
>>   
>> diff --git a/block/blk-mq.c b/block/blk-mq.c
>> index 09f579414161..260e54fa48f0 100644
>> --- a/block/blk-mq.c
>> +++ b/block/blk-mq.c
>> @@ -529,6 +529,8 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
>>   			data->rq_flags |= RQF_USE_SCHED;
>>   			if (ops->limit_depth)
>>   				ops->limit_depth(data->cmd_flags, data);
>> +			else if (!blk_mq_sched_sync_request(data->cmd_flags))
>> +				data->shallow_depth = q->async_depth;
>>   		}
> In the subsequent patches, I saw that ->limit_depth is still used for the
> BFQ scheduler. Given that, it seems more consistent to also retain ->limit_depth
> for the mq-deadline and Kyber schedulers, and set data->shallow_depth within their
> respective ->limit_depth methods. If we take this approach, the additional
> blk_mq_sched_sync_request() check above becomes unnecessary.
>
> So IMO:
> - Keep ->limit_depth for all schedulers (bfq, mq-deadline, kyber).
> - Remove the extra blk_mq_sched_sync_request() check from the core code.

I was thinking to save a function call for deadline and kyber, however, I don't
have preference here and I can do this in the next version.

Thanks,
Kuai

> Thanks,
> --Nilay
>

Re: [PATCH 3/7] blk-mq: add a new queue sysfs attribute async_depth

Posted by Yu Kuai 4 months ago

Hi,

在 2025/10/06 9:57, Yu Kuai 写道:
> Hi,
> 
> 在 2025/10/2 23:10, Nilay Shroff 写道:
>>
>> On 9/30/25 12:41 PM, Yu Kuai wrote:
>>> From: Yu Kuai <yukuai3@huawei.com>
>>>
>>> Add a new field async_depth to request_queue and related APIs, this is
>>> currently not used, following patches will convert elevators to use
>>> this instead of internal async_depth.
>>>
>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>>> ---
>>>   block/blk-core.c       |  1 +
>>>   block/blk-mq.c         |  4 ++++
>>>   block/blk-sysfs.c      | 47 ++++++++++++++++++++++++++++++++++++++++++
>>>   block/elevator.c       |  1 +
>>>   include/linux/blkdev.h |  1 +
>>>   5 files changed, 54 insertions(+)
>>>
>>> diff --git a/block/blk-core.c b/block/blk-core.c
>>> index dd39ff651095..76df70cfc103 100644
>>> --- a/block/blk-core.c
>>> +++ b/block/blk-core.c
>>> @@ -463,6 +463,7 @@ struct request_queue *blk_alloc_queue(struct 
>>> queue_limits *lim, int node_id)
>>>       fs_reclaim_release(GFP_KERNEL);
>>>       q->nr_requests = BLKDEV_DEFAULT_RQ;
>>> +    q->async_depth = BLKDEV_DEFAULT_RQ;
>>>       return q;
>>> diff --git a/block/blk-mq.c b/block/blk-mq.c
>>> index 09f579414161..260e54fa48f0 100644
>>> --- a/block/blk-mq.c
>>> +++ b/block/blk-mq.c
>>> @@ -529,6 +529,8 @@ static struct request 
>>> *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
>>>               data->rq_flags |= RQF_USE_SCHED;
>>>               if (ops->limit_depth)
>>>                   ops->limit_depth(data->cmd_flags, data);
>>> +            else if (!blk_mq_sched_sync_request(data->cmd_flags))
>>> +                data->shallow_depth = q->async_depth;
>>>           }
>> In the subsequent patches, I saw that ->limit_depth is still used for the
>> BFQ scheduler. Given that, it seems more consistent to also retain 
>> ->limit_depth
>> for the mq-deadline and Kyber schedulers, and set data->shallow_depth 
>> within their
>> respective ->limit_depth methods. If we take this approach, the 
>> additional
>> blk_mq_sched_sync_request() check above becomes unnecessary.
>>
>> So IMO:
>> - Keep ->limit_depth for all schedulers (bfq, mq-deadline, kyber).
>> - Remove the extra blk_mq_sched_sync_request() check from the core code.
> 
> I was thinking to save a function call for deadline and kyber, however, 
> I don't
> have preference here and I can do this in the next version.

How abount following, I feel this is better while cooking the new
version. Consider only bfq have specail handling for async request.

static void blk_mq_sched_limit_async_depth(struct blk_mq_alloc_data *data)
{
	if (blk_mq_sched_sync_request(data->cmd_flags))
		return;

	data->shallow_depth = q->async_depth;
	if (ops->limit_async_depth)
		ops->limit_async_depth(data);
}

Thanks,
Kuai

> 
> Thanks,
> Kuai
> 
>> Thanks,
>> --Nilay
>>
> .
>

Re: [PATCH 3/7] blk-mq: add a new queue sysfs attribute async_depth

Posted by Yu Kuai 4 months ago

Hi,

在 2025/10/09 8:48, Yu Kuai 写道:
> Hi,
> 
> 在 2025/10/06 9:57, Yu Kuai 写道:
>> Hi,
>>
>> 在 2025/10/2 23:10, Nilay Shroff 写道:
>>>
>>> On 9/30/25 12:41 PM, Yu Kuai wrote:
>>>> From: Yu Kuai <yukuai3@huawei.com>
>>>>
>>>> Add a new field async_depth to request_queue and related APIs, this is
>>>> currently not used, following patches will convert elevators to use
>>>> this instead of internal async_depth.
>>>>
>>>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>>>> ---
>>>>   block/blk-core.c       |  1 +
>>>>   block/blk-mq.c         |  4 ++++
>>>>   block/blk-sysfs.c      | 47 
>>>> ++++++++++++++++++++++++++++++++++++++++++
>>>>   block/elevator.c       |  1 +
>>>>   include/linux/blkdev.h |  1 +
>>>>   5 files changed, 54 insertions(+)
>>>>
>>>> diff --git a/block/blk-core.c b/block/blk-core.c
>>>> index dd39ff651095..76df70cfc103 100644
>>>> --- a/block/blk-core.c
>>>> +++ b/block/blk-core.c
>>>> @@ -463,6 +463,7 @@ struct request_queue *blk_alloc_queue(struct 
>>>> queue_limits *lim, int node_id)
>>>>       fs_reclaim_release(GFP_KERNEL);
>>>>       q->nr_requests = BLKDEV_DEFAULT_RQ;
>>>> +    q->async_depth = BLKDEV_DEFAULT_RQ;
>>>>       return q;
>>>> diff --git a/block/blk-mq.c b/block/blk-mq.c
>>>> index 09f579414161..260e54fa48f0 100644
>>>> --- a/block/blk-mq.c
>>>> +++ b/block/blk-mq.c
>>>> @@ -529,6 +529,8 @@ static struct request 
>>>> *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
>>>>               data->rq_flags |= RQF_USE_SCHED;
>>>>               if (ops->limit_depth)
>>>>                   ops->limit_depth(data->cmd_flags, data);
>>>> +            else if (!blk_mq_sched_sync_request(data->cmd_flags))
>>>> +                data->shallow_depth = q->async_depth;
>>>>           }
>>> In the subsequent patches, I saw that ->limit_depth is still used for 
>>> the
>>> BFQ scheduler. Given that, it seems more consistent to also retain 
>>> ->limit_depth
>>> for the mq-deadline and Kyber schedulers, and set data->shallow_depth 
>>> within their
>>> respective ->limit_depth methods. If we take this approach, the 
>>> additional
>>> blk_mq_sched_sync_request() check above becomes unnecessary.
>>>
>>> So IMO:
>>> - Keep ->limit_depth for all schedulers (bfq, mq-deadline, kyber).
>>> - Remove the extra blk_mq_sched_sync_request() check from the core code.
>>
>> I was thinking to save a function call for deadline and kyber, 
>> however, I don't
>> have preference here and I can do this in the next version.
> 
> How abount following, I feel this is better while cooking the new
> version. Consider only bfq have specail handling for async request.
> 
> static void blk_mq_sched_limit_async_depth(struct blk_mq_alloc_data *data)
> {
>      if (blk_mq_sched_sync_request(data->cmd_flags))
>          return;
> 
>      data->shallow_depth = q->async_depth;
>      if (ops->limit_async_depth)
>          ops->limit_async_depth(data);
> }
> 

Just realize I forgot that bfq can limit sync requests as well due to
bfq cgroup policy, so this is not good.

Please ignore this :)

Thanks,
Kuai

> Thanks,
> Kuai
> 
>>
>> Thanks,
>> Kuai
>>
>>> Thanks,
>>> --Nilay
>>>
>> .
>>
> 
> .
>

[PATCH 1/7] block: convert nr_requests to unsigned int
[PATCH 2/7] blk-mq-sched: unify elevators checking for async requests
[PATCH 3/7] blk-mq: add a new queue sysfs attribute async_depth
[PATCH 4/7] kyber: covert to use request_queue->async_depth
[PATCH 5/7] mq-deadline: covert to use request_queue->async_depth
[PATCH 6/7] block, bfq: convert to use request_queue->async_depth
[PATCH 7/7] blk-mq: add documentation for new queue attribute async_dpeth