From: Yu Kuai <yukuai3@huawei.com>
Introduce struct sched_dispatch_ctx, and split the helper into
elevator_dispatch_one_request() and elevator_finish_dispatch(). Also
and comments about the non-error return value.
Make code cleaner, and make it easier to add a new branch to dispatch
a batch of requests at a time in the next patch.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
block/blk-mq-sched.c | 196 ++++++++++++++++++++++++++-----------------
1 file changed, 119 insertions(+), 77 deletions(-)
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 82c4f4eef9ed..f18aecf710ad 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -74,91 +74,100 @@ static bool blk_mq_dispatch_hctx_list(struct list_head *rq_list)
#define BLK_MQ_BUDGET_DELAY 3 /* ms units */
-/*
- * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
- * its queue by itself in its completion handler, so we don't need to
- * restart queue if .get_budget() fails to get the budget.
- *
- * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
- * be run again. This is necessary to avoid starving flushes.
- */
-static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
-{
- struct request_queue *q = hctx->queue;
- struct elevator_queue *e = q->elevator;
- bool multi_hctxs = false, run_queue = false;
- bool dispatched = false, busy = false;
- unsigned int max_dispatch;
- LIST_HEAD(rq_list);
- int count = 0;
+struct sched_dispatch_ctx {
+ struct blk_mq_hw_ctx *hctx;
+ struct elevator_queue *e;
+ struct request_queue *q;
- if (hctx->dispatch_busy)
- max_dispatch = 1;
- else
- max_dispatch = hctx->queue->nr_requests;
+ struct list_head rq_list;
+ int count;
- do {
- bool sq_sched = blk_queue_sq_sched(q);
- struct request *rq;
- int budget_token;
+ bool multi_hctxs;
+ bool run_queue;
+ bool busy;
+};
- if (e->type->ops.has_work && !e->type->ops.has_work(hctx))
- break;
+static bool elevator_can_dispatch(struct sched_dispatch_ctx *ctx)
+{
+ if (ctx->e->type->ops.has_work &&
+ !ctx->e->type->ops.has_work(ctx->hctx))
+ return false;
- if (!list_empty_careful(&hctx->dispatch)) {
- busy = true;
- break;
- }
+ if (!list_empty_careful(&ctx->hctx->dispatch)) {
+ ctx->busy = true;
+ return false;
+ }
- budget_token = blk_mq_get_dispatch_budget(q);
- if (budget_token < 0)
- break;
+ return true;
+}
- if (sq_sched)
- spin_lock_irq(&e->lock);
- rq = e->type->ops.dispatch_request(hctx);
- if (sq_sched)
- spin_unlock_irq(&e->lock);
+static bool elevator_dispatch_one_request(struct sched_dispatch_ctx *ctx)
+{
+ bool sq_sched = blk_queue_sq_sched(ctx->q);
+ struct request *rq;
+ int budget_token;
- if (!rq) {
- blk_mq_put_dispatch_budget(q, budget_token);
- /*
- * We're releasing without dispatching. Holding the
- * budget could have blocked any "hctx"s with the
- * same queue and if we didn't dispatch then there's
- * no guarantee anyone will kick the queue. Kick it
- * ourselves.
- */
- run_queue = true;
- break;
- }
+ if (!elevator_can_dispatch(ctx))
+ return false;
- blk_mq_set_rq_budget_token(rq, budget_token);
+ budget_token = blk_mq_get_dispatch_budget(ctx->q);
+ if (budget_token < 0)
+ return false;
- /*
- * Now this rq owns the budget which has to be released
- * if this rq won't be queued to driver via .queue_rq()
- * in blk_mq_dispatch_rq_list().
- */
- list_add_tail(&rq->queuelist, &rq_list);
- count++;
- if (rq->mq_hctx != hctx)
- multi_hctxs = true;
+ if (sq_sched)
+ spin_lock_irq(&ctx->e->lock);
+ rq = ctx->e->type->ops.dispatch_request(ctx->hctx);
+ if (sq_sched)
+ spin_unlock_irq(&ctx->e->lock);
+ if (!rq) {
+ blk_mq_put_dispatch_budget(ctx->q, budget_token);
/*
- * If we cannot get tag for the request, stop dequeueing
- * requests from the IO scheduler. We are unlikely to be able
- * to submit them anyway and it creates false impression for
- * scheduling heuristics that the device can take more IO.
+ * We're releasing without dispatching. Holding the
+ * budget could have blocked any "hctx"s with the
+ * same queue and if we didn't dispatch then there's
+ * no guarantee anyone will kick the queue. Kick it
+ * ourselves.
*/
- if (!blk_mq_get_driver_tag(rq))
- break;
- } while (count < max_dispatch);
+ ctx->run_queue = true;
+ return false;
+ }
- if (!count) {
- if (run_queue)
- blk_mq_delay_run_hw_queues(q, BLK_MQ_BUDGET_DELAY);
- } else if (multi_hctxs) {
+ blk_mq_set_rq_budget_token(rq, budget_token);
+
+ /*
+ * Now this rq owns the budget which has to be released
+ * if this rq won't be queued to driver via .queue_rq()
+ * in blk_mq_dispatch_rq_list().
+ */
+ list_add_tail(&rq->queuelist, &ctx->rq_list);
+ ctx->count++;
+ if (rq->mq_hctx != ctx->hctx)
+ ctx->multi_hctxs = true;
+
+ /*
+ * If we cannot get tag for the request, stop dequeueing
+ * requests from the IO scheduler. We are unlikely to be able
+ * to submit them anyway and it creates false impression for
+ * scheduling heuristics that the device can take more IO.
+ */
+ return blk_mq_get_driver_tag(rq);
+}
+
+/*
+ * Returns -EAGAIN if hctx->dispatch was found non-empty and run_work has to
+ * be run again. This is necessary to avoid starving flushes.
+ * Return 0 if no request is dispatched.
+ * Return 1 if at least one request is dispatched.
+ */
+static int elevator_finish_dispatch(struct sched_dispatch_ctx *ctx)
+{
+ bool dispatched = false;
+
+ if (!ctx->count) {
+ if (ctx->run_queue)
+ blk_mq_delay_run_hw_queues(ctx->q, BLK_MQ_BUDGET_DELAY);
+ } else if (ctx->multi_hctxs) {
/*
* Requests from different hctx may be dequeued from some
* schedulers, such as bfq and deadline.
@@ -166,19 +175,52 @@ static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
* Sort the requests in the list according to their hctx,
* dispatch batching requests from same hctx at a time.
*/
- list_sort(NULL, &rq_list, sched_rq_cmp);
+ list_sort(NULL, &ctx->rq_list, sched_rq_cmp);
do {
- dispatched |= blk_mq_dispatch_hctx_list(&rq_list);
- } while (!list_empty(&rq_list));
+ dispatched |= blk_mq_dispatch_hctx_list(&ctx->rq_list);
+ } while (!list_empty(&ctx->rq_list));
} else {
- dispatched = blk_mq_dispatch_rq_list(hctx, &rq_list, false);
+ dispatched = blk_mq_dispatch_rq_list(ctx->hctx, &ctx->rq_list,
+ false);
}
- if (busy)
+ if (ctx->busy)
return -EAGAIN;
+
return !!dispatched;
}
+/*
+ * Only SCSI implements .get_budget and .put_budget, and SCSI restarts
+ * its queue by itself in its completion handler, so we don't need to
+ * restart queue if .get_budget() fails to get the budget.
+ *
+ * See elevator_finish_dispatch() for return values.
+ */
+static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
+{
+ unsigned int max_dispatch;
+ struct sched_dispatch_ctx ctx = {
+ .hctx = hctx,
+ .q = hctx->queue,
+ .e = hctx->queue->elevator,
+ };
+
+ INIT_LIST_HEAD(&ctx.rq_list);
+
+ if (hctx->dispatch_busy)
+ max_dispatch = 1;
+ else
+ max_dispatch = hctx->queue->nr_requests;
+
+ do {
+ if (!elevator_dispatch_one_request(&ctx))
+ break;
+ } while (ctx.count < max_dispatch);
+
+ return elevator_finish_dispatch(&ctx);
+}
+
static int blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
{
unsigned long end = jiffies + HZ;
--
2.39.2
On 7/30/25 1:22 AM, Yu Kuai wrote:
> Introduce struct sched_dispatch_ctx, and split the helper into
> elevator_dispatch_one_request() and elevator_finish_dispatch(). Also
> and comments about the non-error return value.
and -> add
> +struct sched_dispatch_ctx {
> + struct blk_mq_hw_ctx *hctx;
> + struct elevator_queue *e;
> + struct request_queue *q;
'e' is always equal to q->elevator so I'm not sure whether it's worth to
have the member 'e'?
> +static bool elevator_can_dispatch(struct sched_dispatch_ctx *ctx)
> +{
> + if (ctx->e->type->ops.has_work &&
> + !ctx->e->type->ops.has_work(ctx->hctx))
> + return false;
>
> - if (!list_empty_careful(&hctx->dispatch)) {
> - busy = true;
> - break;
> - }
> + if (!list_empty_careful(&ctx->hctx->dispatch)) {
> + ctx->busy = true;
> + return false;
> + }
>
> - budget_token = blk_mq_get_dispatch_budget(q);
> - if (budget_token < 0)
> - break;
> + return true;
> +}
Shouldn't all function names in this file start with the blk_mq_ prefix?
Additionally, please rename elevator_can_dispatch() into
elevator_should_dispatch(). I think the latter name better reflects the
purpose of this function.
> + if (sq_sched)
> + spin_lock_irq(&ctx->e->lock);
> + rq = ctx->e->type->ops.dispatch_request(ctx->hctx);
> + if (sq_sched)
> + spin_unlock_irq(&ctx->e->lock);
Same comment here as on patch 1/5: code like the above makes it
harder than necessary for static analyzers to verify this code.
>
> + if (!rq) {
> + blk_mq_put_dispatch_budget(ctx->q, budget_token);
> /*
> - * If we cannot get tag for the request, stop dequeueing
> - * requests from the IO scheduler. We are unlikely to be able
> - * to submit them anyway and it creates false impression for
> - * scheduling heuristics that the device can take more IO.
> + * We're releasing without dispatching. Holding the
> + * budget could have blocked any "hctx"s with the
> + * same queue and if we didn't dispatch then there's
> + * no guarantee anyone will kick the queue. Kick it
> + * ourselves.
> */
Please keep the original comment. To me the new comment seems less clear
than the existing comment.
> +static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
> +{
> + unsigned int max_dispatch;
> + struct sched_dispatch_ctx ctx = {
> + .hctx = hctx,
> + .q = hctx->queue,
> + .e = hctx->queue->elevator,
> + };
> +
> + INIT_LIST_HEAD(&ctx.rq_list);
Please remove the INIT_LIST_HEAD() invocation and add the following in
the ctx declaration:
.rq_list = LIST_HEAD_INIT(ctx.rq_list),
This is a common pattern in kernel code. The following grep command
yields about 200 results:
$ git grep -nH '= LIST_HEAD_INIT.*\.'
Otherwise this patch looks good to me.
Thanks,
Bart.
Hi,
在 2025/07/31 2:32, Bart Van Assche 写道:
> On 7/30/25 1:22 AM, Yu Kuai wrote:
>> Introduce struct sched_dispatch_ctx, and split the helper into
>> elevator_dispatch_one_request() and elevator_finish_dispatch(). Also
>> and comments about the non-error return value.
>
> and -> add
>
>> +struct sched_dispatch_ctx {
>> + struct blk_mq_hw_ctx *hctx;
>> + struct elevator_queue *e;
>> + struct request_queue *q;
>
> 'e' is always equal to q->elevator so I'm not sure whether it's worth to
> have the member 'e'?
>
>> +static bool elevator_can_dispatch(struct sched_dispatch_ctx *ctx)
>> +{
>> + if (ctx->e->type->ops.has_work &&
>> + !ctx->e->type->ops.has_work(ctx->hctx))
>> + return false;
>> - if (!list_empty_careful(&hctx->dispatch)) {
>> - busy = true;
>> - break;
>> - }
>> + if (!list_empty_careful(&ctx->hctx->dispatch)) {
>> + ctx->busy = true;
>> + return false;
>> + }
>> - budget_token = blk_mq_get_dispatch_budget(q);
>> - if (budget_token < 0)
>> - break;
>> + return true;
>> +}
>
> Shouldn't all function names in this file start with the blk_mq_ prefix?
Ok
>
> Additionally, please rename elevator_can_dispatch() into
> elevator_should_dispatch(). I think the latter name better reflects the
> purpose of this function.
Sounds good.
>
>> + if (sq_sched)
>> + spin_lock_irq(&ctx->e->lock);
>> + rq = ctx->e->type->ops.dispatch_request(ctx->hctx);
>> + if (sq_sched)
>> + spin_unlock_irq(&ctx->e->lock);
>
> Same comment here as on patch 1/5: code like the above makes it
> harder than necessary for static analyzers to verify this code.
Ok
>
>> + if (!rq) {
>> + blk_mq_put_dispatch_budget(ctx->q, budget_token);
>> /*
>> - * If we cannot get tag for the request, stop dequeueing
>> - * requests from the IO scheduler. We are unlikely to be able
>> - * to submit them anyway and it creates false impression for
>> - * scheduling heuristics that the device can take more IO.
>> + * We're releasing without dispatching. Holding the
>> + * budget could have blocked any "hctx"s with the
>> + * same queue and if we didn't dispatch then there's
>> + * no guarantee anyone will kick the queue. Kick it
>> + * ourselves.
>> */
>
> Please keep the original comment. To me the new comment seems less clear
> than the existing comment.
Please note that I didn't change the comment here, above comment is for
setting the run_queue. The original comment for blk_mq_get_driver_tag()
is still there.
>
>> +static int __blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx)
>> +{
>> + unsigned int max_dispatch;
>> + struct sched_dispatch_ctx ctx = {
>> + .hctx = hctx,
>> + .q = hctx->queue,
>> + .e = hctx->queue->elevator,
>> + };
>> +
>> + INIT_LIST_HEAD(&ctx.rq_list);
>
> Please remove the INIT_LIST_HEAD() invocation and add the following in
> the ctx declaration:
>
> .rq_list = LIST_HEAD_INIT(ctx.rq_list),
>
> This is a common pattern in kernel code. The following grep command
> yields about 200 results:
>
> $ git grep -nH '= LIST_HEAD_INIT.*\.'
Ok
>
> Otherwise this patch looks good to me.
>
Thanks for the review!
Kuai
> Thanks,
>
> Bart.
> .
>
© 2016 - 2026 Red Hat, Inc.