blk-rq-qos: fix possible deadlock

[PATCH 1/4] blk-mq-debugfs: warn about possible deadlock

Posted by Yu Kuai 3 months, 4 weeks ago

Creating new debugfs entries can trigger fs reclaim, hence we can't do
this with queue freezed, meanwhile, other locks that can be held while
queue is freezed should not be held as well.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-mq-debugfs.c | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 4896525b1c05..66864ed0b77f 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -608,9 +608,23 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
 	{},
 };
 
-static void debugfs_create_files(struct dentry *parent, void *data,
+static void debugfs_create_files(struct request_queue *q, struct dentry *parent,
+				 void *data,
 				 const struct blk_mq_debugfs_attr *attr)
 {
+	/*
+	 * Creating new debugfs entries with queue freezed has the rist of
+	 * deadlock.
+	 */
+	WARN_ON_ONCE(q->mq_freeze_depth != 0);
+	/*
+	 * debugfs_mutex should not be nested under other locks that can be
+	 * grabbed while queue is freezed.
+	 */
+	lockdep_assert_not_held(&q->elevator_lock);
+	lockdep_assert_not_held(&q->rq_qos_mutex);
+	lockdep_assert_not_held(&q->blkcg_mutex);
+
 	if (IS_ERR_OR_NULL(parent))
 		return;
 
@@ -624,7 +638,7 @@ void blk_mq_debugfs_register(struct request_queue *q)
 	struct blk_mq_hw_ctx *hctx;
 	unsigned long i;
 
-	debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
+	debugfs_create_files(q, q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (!hctx->debugfs_dir)
@@ -650,7 +664,8 @@ static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
 	snprintf(name, sizeof(name), "cpu%u", ctx->cpu);
 	ctx_dir = debugfs_create_dir(name, hctx->debugfs_dir);
 
-	debugfs_create_files(ctx_dir, ctx, blk_mq_debugfs_ctx_attrs);
+	debugfs_create_files(hctx->queue, ctx_dir, ctx,
+			     blk_mq_debugfs_ctx_attrs);
 }
 
 void blk_mq_debugfs_register_hctx(struct request_queue *q,
@@ -666,7 +681,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q,
 	snprintf(name, sizeof(name), "hctx%u", hctx->queue_num);
 	hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir);
 
-	debugfs_create_files(hctx->debugfs_dir, hctx, blk_mq_debugfs_hctx_attrs);
+	debugfs_create_files(q, hctx->debugfs_dir, hctx,
+			     blk_mq_debugfs_hctx_attrs);
 
 	hctx_for_each_ctx(hctx, ctx, i)
 		blk_mq_debugfs_register_ctx(hctx, ctx);
@@ -717,7 +733,7 @@ void blk_mq_debugfs_register_sched(struct request_queue *q)
 
 	q->sched_debugfs_dir = debugfs_create_dir("sched", q->debugfs_dir);
 
-	debugfs_create_files(q->sched_debugfs_dir, q, e->queue_debugfs_attrs);
+	debugfs_create_files(q, q->sched_debugfs_dir, q, e->queue_debugfs_attrs);
 }
 
 void blk_mq_debugfs_unregister_sched(struct request_queue *q)
@@ -766,7 +782,8 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
 							 q->debugfs_dir);
 
 	rqos->debugfs_dir = debugfs_create_dir(dir_name, q->rqos_debugfs_dir);
-	debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs);
+	debugfs_create_files(q, rqos->debugfs_dir, rqos,
+			     rqos->ops->debugfs_attrs);
 }
 
 void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
@@ -789,7 +806,7 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
 
 	hctx->sched_debugfs_dir = debugfs_create_dir("sched",
 						     hctx->debugfs_dir);
-	debugfs_create_files(hctx->sched_debugfs_dir, hctx,
+	debugfs_create_files(q, hctx->sched_debugfs_dir, hctx,
 			     e->hctx_debugfs_attrs);
 }
 
-- 
2.39.2

Re: [PATCH 1/4] blk-mq-debugfs: warn about possible deadlock

Posted by Ming Lei 3 months, 4 weeks ago

On Tue, Oct 14, 2025 at 10:21:46AM +0800, Yu Kuai wrote:
> Creating new debugfs entries can trigger fs reclaim, hence we can't do
> this with queue freezed, meanwhile, other locks that can be held while
> queue is freezed should not be held as well.
> 
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  block/blk-mq-debugfs.c | 31 ++++++++++++++++++++++++-------
>  1 file changed, 24 insertions(+), 7 deletions(-)
> 
> diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
> index 4896525b1c05..66864ed0b77f 100644
> --- a/block/blk-mq-debugfs.c
> +++ b/block/blk-mq-debugfs.c
> @@ -608,9 +608,23 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
>  	{},
>  };
>  
> -static void debugfs_create_files(struct dentry *parent, void *data,
> +static void debugfs_create_files(struct request_queue *q, struct dentry *parent,
> +				 void *data,
>  				 const struct blk_mq_debugfs_attr *attr)
>  {
> +	/*
> +	 * Creating new debugfs entries with queue freezed has the rist of
> +	 * deadlock.
> +	 */
> +	WARN_ON_ONCE(q->mq_freeze_depth != 0);
> +	/*
> +	 * debugfs_mutex should not be nested under other locks that can be
> +	 * grabbed while queue is freezed.
> +	 */
> +	lockdep_assert_not_held(&q->elevator_lock);
> +	lockdep_assert_not_held(&q->rq_qos_mutex);

->rq_qos_mutex use looks one real mess, in blk-cgroup.c, it is grabbed after
queue is frozen. However, inside block/blk-rq-qos.c, the two are re-ordered,
maybe we need to fix order between queue freeze and q->rq_qos_mutex first?
Or move on by removing the above line?

Otherwise, this patch looks good.


Thanks,
Ming

Re: [PATCH 1/4] blk-mq-debugfs: warn about possible deadlock

Posted by Yu Kuai 3 months, 4 weeks ago

Hi,

在 2025/10/14 16:06, Ming Lei 写道:
> On Tue, Oct 14, 2025 at 10:21:46AM +0800, Yu Kuai wrote:
>> Creating new debugfs entries can trigger fs reclaim, hence we can't do
>> this with queue freezed, meanwhile, other locks that can be held while
>> queue is freezed should not be held as well.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   block/blk-mq-debugfs.c | 31 ++++++++++++++++++++++++-------
>>   1 file changed, 24 insertions(+), 7 deletions(-)
>>
>> diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
>> index 4896525b1c05..66864ed0b77f 100644
>> --- a/block/blk-mq-debugfs.c
>> +++ b/block/blk-mq-debugfs.c
>> @@ -608,9 +608,23 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
>>   	{},
>>   };
>>   
>> -static void debugfs_create_files(struct dentry *parent, void *data,
>> +static void debugfs_create_files(struct request_queue *q, struct dentry *parent,
>> +				 void *data,
>>   				 const struct blk_mq_debugfs_attr *attr)
>>   {
>> +	/*
>> +	 * Creating new debugfs entries with queue freezed has the rist of
>> +	 * deadlock.
>> +	 */
>> +	WARN_ON_ONCE(q->mq_freeze_depth != 0);
>> +	/*
>> +	 * debugfs_mutex should not be nested under other locks that can be
>> +	 * grabbed while queue is freezed.
>> +	 */
>> +	lockdep_assert_not_held(&q->elevator_lock);
>> +	lockdep_assert_not_held(&q->rq_qos_mutex);
> 
> ->rq_qos_mutex use looks one real mess, in blk-cgroup.c, it is grabbed after
> queue is frozen. However, inside block/blk-rq-qos.c, the two are re-ordered,
> maybe we need to fix order between queue freeze and q->rq_qos_mutex first?
> Or move on by removing the above line?

Yeah, I see this reoder as well, and I tried to fix this in the other
thread for blkg configuration.

- queue is freezed by new helper blkg_conf_start(), and unfreezed after
   blkg_conf_end(), rq_qos_add() is now called between them.

And for wbt, there are two cases:
  - for blk-sysfs, queue is alredy freezed before rq_qos_add() as well;
  - for wbt_enable_default(), this looks still problemaic, we should fix
    the reorder seperatly.

Perhaps, should I fix this simple problem first, and then rebase the
thread to convert queue_lock to blkcg_mtuex?

Thanks,
Kuai


Thanks,
Kuai
> 
> Otherwise, this patch looks good.
> 
> 
> Thanks,
> Ming
> 
> 
> .
>

Re: [PATCH 1/4] blk-mq-debugfs: warn about possible deadlock

Posted by Ming Lei 3 months, 4 weeks ago

On Tue, Oct 14, 2025 at 04:21:30PM +0800, Yu Kuai wrote:
> Hi,
> 
> 在 2025/10/14 16:06, Ming Lei 写道:
> > On Tue, Oct 14, 2025 at 10:21:46AM +0800, Yu Kuai wrote:
> > > Creating new debugfs entries can trigger fs reclaim, hence we can't do
> > > this with queue freezed, meanwhile, other locks that can be held while
> > > queue is freezed should not be held as well.
> > > 
> > > Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> > > ---
> > >   block/blk-mq-debugfs.c | 31 ++++++++++++++++++++++++-------
> > >   1 file changed, 24 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
> > > index 4896525b1c05..66864ed0b77f 100644
> > > --- a/block/blk-mq-debugfs.c
> > > +++ b/block/blk-mq-debugfs.c
> > > @@ -608,9 +608,23 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
> > >   	{},
> > >   };
> > > -static void debugfs_create_files(struct dentry *parent, void *data,
> > > +static void debugfs_create_files(struct request_queue *q, struct dentry *parent,
> > > +				 void *data,
> > >   				 const struct blk_mq_debugfs_attr *attr)
> > >   {
> > > +	/*
> > > +	 * Creating new debugfs entries with queue freezed has the rist of
> > > +	 * deadlock.
> > > +	 */
> > > +	WARN_ON_ONCE(q->mq_freeze_depth != 0);
> > > +	/*
> > > +	 * debugfs_mutex should not be nested under other locks that can be
> > > +	 * grabbed while queue is freezed.
> > > +	 */
> > > +	lockdep_assert_not_held(&q->elevator_lock);
> > > +	lockdep_assert_not_held(&q->rq_qos_mutex);
> > 
> > ->rq_qos_mutex use looks one real mess, in blk-cgroup.c, it is grabbed after
> > queue is frozen. However, inside block/blk-rq-qos.c, the two are re-ordered,
> > maybe we need to fix order between queue freeze and q->rq_qos_mutex first?
> > Or move on by removing the above line?
> 
> Yeah, I see this reoder as well, and I tried to fix this in the other
> thread for blkg configuration.
> 
> - queue is freezed by new helper blkg_conf_start(), and unfreezed after
>   blkg_conf_end(), rq_qos_add() is now called between them.
> 
> And for wbt, there are two cases:
>  - for blk-sysfs, queue is alredy freezed before rq_qos_add() as well;
>  - for wbt_enable_default(), this looks still problemaic, we should fix
>    the reorder seperatly.
> 
> Perhaps, should I fix this simple problem first, and then rebase the
> thread to convert queue_lock to blkcg_mtuex?

As I mentioned, if you want to move on with patchset first, the line of
`lockdep_assert_not_held(&q->rq_qos_mutex);` shouldn't be added until
->rq_qos_mutex vs. freeze queue order is finalized.


Thanks,
Ming