[PATCH 2/6] block, bfq: don't grab queue_lock from io path

Yu Kuai posted 6 patches 2 months, 2 weeks ago
There is a newer version of this series
[PATCH 2/6] block, bfq: don't grab queue_lock from io path
Posted by Yu Kuai 2 months, 2 weeks ago
From: Yu Kuai <yukuai3@huawei.com>

Currently issue io can grab queue_lock three times from bfq_bio_merge(),
bfq_limit_depth() and bfq_prepare_request(), the queue_lock is not
necessary if icq is already created:

- queue_usage_counter is already grabbed and queue won't exist;
- current thread won't exist;
- if other thread is allocating and inserting new icq to ioc->icq_tree,
  rcu can be used to protect lookup icq from the raidx tree, it's safe
  to use extracted icq until queue or current thread exit;

If ioc or icq is not created, then bfq_prepare_request() will create it,
which means the task is issuing io to queue the first time, this can
consider a slow path and queue_lock will still be held to protect
inserting allocated icq to ioc->icq_tree.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/bfq-iosched.c | 24 +++++++-----------------
 block/blk-ioc.c     | 43 ++++++++++++++++++++++++++++++++++++++-----
 block/blk.h         |  2 +-
 3 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 0cb1e9873aab..58d57c482acd 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -454,17 +454,13 @@ static struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
  */
 static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
 {
-	struct bfq_io_cq *icq;
-	unsigned long flags;
-
-	if (!current->io_context)
-		return NULL;
+	struct io_cq *icq;
 
-	spin_lock_irqsave(&q->queue_lock, flags);
-	icq = icq_to_bic(ioc_lookup_icq(q));
-	spin_unlock_irqrestore(&q->queue_lock, flags);
+	rcu_read_lock();
+	icq = ioc_lookup_icq_rcu(q);
+	rcu_read_unlock();
 
-	return icq;
+	return icq_to_bic(icq);
 }
 
 /*
@@ -2456,16 +2452,10 @@ static void bfq_remove_request(struct request_queue *q,
 static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
 		unsigned int nr_segs)
 {
+	/* bic will not be freed until current or elevator exit */
+	struct bfq_io_cq *bic = bfq_bic_lookup(q);
 	struct bfq_data *bfqd = q->elevator->elevator_data;
 	struct request *free = NULL;
-	/*
-	 * bfq_bic_lookup grabs the queue_lock: invoke it now and
-	 * store its return value for later use, to avoid nesting
-	 * queue_lock inside the bfqd->lock. We assume that the bic
-	 * returned by bfq_bic_lookup does not go away before
-	 * bfqd->lock is taken.
-	 */
-	struct bfq_io_cq *bic = bfq_bic_lookup(q);
 	bool ret;
 
 	spin_lock_irq(&bfqd->lock);
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index ce82770c72ab..0be097a37e22 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -314,7 +314,7 @@ int __copy_io(unsigned long clone_flags, struct task_struct *tsk)
  * Look up io_cq associated with @ioc - @q pair from @ioc.  Must be called
  * with @q->queue_lock held.
  */
-struct io_cq *ioc_lookup_icq(struct request_queue *q)
+static struct io_cq *ioc_lookup_icq(struct request_queue *q)
 {
 	struct io_context *ioc = current->io_context;
 	struct io_cq *icq;
@@ -341,7 +341,40 @@ struct io_cq *ioc_lookup_icq(struct request_queue *q)
 	rcu_read_unlock();
 	return icq;
 }
-EXPORT_SYMBOL(ioc_lookup_icq);
+
+/**
+ * ioc_lookup_icq_rcu - lookup io_cq from ioc in io path
+ * @q: the associated request_queue
+ *
+ * Look up io_cq associated with @ioc - @q pair from @ioc.  Must be called
+ * from io path, either return NULL if current issue io to @q for the first
+ * time, or return a valid icq.
+ */
+struct io_cq *ioc_lookup_icq_rcu(struct request_queue *q)
+{
+	struct io_context *ioc = current->io_context;
+	struct io_cq *icq;
+
+	WARN_ON_ONCE(percpu_ref_is_zero(&q->q_usage_counter));
+
+	if (!ioc)
+		return NULL;
+
+	icq = rcu_dereference(ioc->icq_hint);
+	if (icq && icq->q == q)
+		return icq;
+
+	icq = radix_tree_lookup(&ioc->icq_tree, q->id);
+	if (!icq)
+		return NULL;
+
+	if (WARN_ON_ONCE(icq->q != q))
+		return NULL;
+
+	rcu_assign_pointer(ioc->icq_hint, icq);
+	return icq;
+}
+EXPORT_SYMBOL(ioc_lookup_icq_rcu);
 
 /**
  * ioc_create_icq - create and link io_cq
@@ -420,9 +453,9 @@ struct io_cq *ioc_find_get_icq(struct request_queue *q)
 	} else {
 		get_io_context(ioc);
 
-		spin_lock_irq(&q->queue_lock);
-		icq = ioc_lookup_icq(q);
-		spin_unlock_irq(&q->queue_lock);
+		rcu_read_lock();
+		icq = ioc_lookup_icq_rcu(q);
+		rcu_read_unlock();
 	}
 
 	if (!icq) {
diff --git a/block/blk.h b/block/blk.h
index 468aa83c5a22..3c078e517d59 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -460,7 +460,7 @@ static inline void req_set_nomerge(struct request_queue *q, struct request *req)
  * Internal io_context interface
  */
 struct io_cq *ioc_find_get_icq(struct request_queue *q);
-struct io_cq *ioc_lookup_icq(struct request_queue *q);
+struct io_cq *ioc_lookup_icq_rcu(struct request_queue *q);
 #ifdef CONFIG_BLK_ICQ
 void ioc_clear_queue(struct request_queue *q);
 #else
-- 
2.39.2
Re: [PATCH 2/6] block, bfq: don't grab queue_lock from io path
Posted by Damien Le Moal 2 months, 2 weeks ago
On 7/22/25 4:24 PM, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> Currently issue io can grab queue_lock three times from bfq_bio_merge(),
> bfq_limit_depth() and bfq_prepare_request(), the queue_lock is not
> necessary if icq is already created:
> 
> - queue_usage_counter is already grabbed and queue won't exist;
> - current thread won't exist;
> - if other thread is allocating and inserting new icq to ioc->icq_tree,
>   rcu can be used to protect lookup icq from the raidx tree, it's safe
>   to use extracted icq until queue or current thread exit;
> 
> If ioc or icq is not created, then bfq_prepare_request() will create it,
> which means the task is issuing io to queue the first time, this can
> consider a slow path and queue_lock will still be held to protect
> inserting allocated icq to ioc->icq_tree.
> 
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>  block/bfq-iosched.c | 24 +++++++-----------------
>  block/blk-ioc.c     | 43 ++++++++++++++++++++++++++++++++++++++-----
>  block/blk.h         |  2 +-
>  3 files changed, 46 insertions(+), 23 deletions(-)
> 
> diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
> index 0cb1e9873aab..58d57c482acd 100644
> --- a/block/bfq-iosched.c
> +++ b/block/bfq-iosched.c
> @@ -454,17 +454,13 @@ static struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
>   */
>  static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
>  {
> -	struct bfq_io_cq *icq;
> -	unsigned long flags;
> -
> -	if (!current->io_context)
> -		return NULL;
> +	struct io_cq *icq;
>  
> -	spin_lock_irqsave(&q->queue_lock, flags);
> -	icq = icq_to_bic(ioc_lookup_icq(q));
> -	spin_unlock_irqrestore(&q->queue_lock, flags);
> +	rcu_read_lock();
> +	icq = ioc_lookup_icq_rcu(q);
> +	rcu_read_unlock();
>  
> -	return icq;
> +	return icq_to_bic(icq);

icq cannot be NULL here ? If it can, that needs checking, otherwise,
icq_to_bic() will return a bad address.

>  }
>  
>  /*
> @@ -2456,16 +2452,10 @@ static void bfq_remove_request(struct request_queue *q,
>  static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
>  		unsigned int nr_segs)
>  {
> +	/* bic will not be freed until current or elevator exit */

I would drop this comment, or move it somewhere else as having a comment in the
declarations seems odd.

> +	struct bfq_io_cq *bic = bfq_bic_lookup(q);
>  	struct bfq_data *bfqd = q->elevator->elevator_data;
>  	struct request *free = NULL;
> -	/*
> -	 * bfq_bic_lookup grabs the queue_lock: invoke it now and
> -	 * store its return value for later use, to avoid nesting
> -	 * queue_lock inside the bfqd->lock. We assume that the bic
> -	 * returned by bfq_bic_lookup does not go away before
> -	 * bfqd->lock is taken.
> -	 */
> -	struct bfq_io_cq *bic = bfq_bic_lookup(q);
>  	bool ret;
>  
>  	spin_lock_irq(&bfqd->lock);
> diff --git a/block/blk-ioc.c b/block/blk-ioc.c
> index ce82770c72ab..0be097a37e22 100644
> --- a/block/blk-ioc.c
> +++ b/block/blk-ioc.c
> @@ -314,7 +314,7 @@ int __copy_io(unsigned long clone_flags, struct task_struct *tsk)
>   * Look up io_cq associated with @ioc - @q pair from @ioc.  Must be called
>   * with @q->queue_lock held.
>   */
> -struct io_cq *ioc_lookup_icq(struct request_queue *q)
> +static struct io_cq *ioc_lookup_icq(struct request_queue *q)
>  {
>  	struct io_context *ioc = current->io_context;
>  	struct io_cq *icq;
> @@ -341,7 +341,40 @@ struct io_cq *ioc_lookup_icq(struct request_queue *q)
>  	rcu_read_unlock();
>  	return icq;
>  }
> -EXPORT_SYMBOL(ioc_lookup_icq);
> +
> +/**
> + * ioc_lookup_icq_rcu - lookup io_cq from ioc in io path
> + * @q: the associated request_queue
> + *
> + * Look up io_cq associated with @ioc - @q pair from @ioc.  Must be called
> + * from io path, either return NULL if current issue io to @q for the first
> + * time, or return a valid icq.
> + */
> +struct io_cq *ioc_lookup_icq_rcu(struct request_queue *q)
> +{
> +	struct io_context *ioc = current->io_context;
> +	struct io_cq *icq;
> +
> +	WARN_ON_ONCE(percpu_ref_is_zero(&q->q_usage_counter));
> +
> +	if (!ioc)
> +		return NULL;
> +
> +	icq = rcu_dereference(ioc->icq_hint);
> +	if (icq && icq->q == q)
> +		return icq;
> +
> +	icq = radix_tree_lookup(&ioc->icq_tree, q->id);
> +	if (!icq)
> +		return NULL;
> +
> +	if (WARN_ON_ONCE(icq->q != q))
> +		return NULL;
> +
> +	rcu_assign_pointer(ioc->icq_hint, icq);
> +	return icq;
> +}
> +EXPORT_SYMBOL(ioc_lookup_icq_rcu);
>  
>  /**
>   * ioc_create_icq - create and link io_cq
> @@ -420,9 +453,9 @@ struct io_cq *ioc_find_get_icq(struct request_queue *q)
>  	} else {
>  		get_io_context(ioc);
>  
> -		spin_lock_irq(&q->queue_lock);
> -		icq = ioc_lookup_icq(q);
> -		spin_unlock_irq(&q->queue_lock);
> +		rcu_read_lock();
> +		icq = ioc_lookup_icq_rcu(q);
> +		rcu_read_unlock();
>  	}
>  
>  	if (!icq) {
> diff --git a/block/blk.h b/block/blk.h
> index 468aa83c5a22..3c078e517d59 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -460,7 +460,7 @@ static inline void req_set_nomerge(struct request_queue *q, struct request *req)
>   * Internal io_context interface
>   */
>  struct io_cq *ioc_find_get_icq(struct request_queue *q);
> -struct io_cq *ioc_lookup_icq(struct request_queue *q);
> +struct io_cq *ioc_lookup_icq_rcu(struct request_queue *q);
>  #ifdef CONFIG_BLK_ICQ
>  void ioc_clear_queue(struct request_queue *q);
>  #else

The blk-ioc changes should go into there own patch, to separate block layer
changes and bfq scheduler changes. No ?


-- 
Damien Le Moal
Western Digital Research
Re: [PATCH 2/6] block, bfq: don't grab queue_lock from io path
Posted by Yu Kuai 2 months, 2 weeks ago
Hi,

在 2025/07/23 9:52, Damien Le Moal 写道:
> On 7/22/25 4:24 PM, Yu Kuai wrote:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Currently issue io can grab queue_lock three times from bfq_bio_merge(),
>> bfq_limit_depth() and bfq_prepare_request(), the queue_lock is not
>> necessary if icq is already created:
>>
>> - queue_usage_counter is already grabbed and queue won't exist;
>> - current thread won't exist;
>> - if other thread is allocating and inserting new icq to ioc->icq_tree,
>>    rcu can be used to protect lookup icq from the raidx tree, it's safe
>>    to use extracted icq until queue or current thread exit;
>>
>> If ioc or icq is not created, then bfq_prepare_request() will create it,
>> which means the task is issuing io to queue the first time, this can
>> consider a slow path and queue_lock will still be held to protect
>> inserting allocated icq to ioc->icq_tree.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   block/bfq-iosched.c | 24 +++++++-----------------
>>   block/blk-ioc.c     | 43 ++++++++++++++++++++++++++++++++++++++-----
>>   block/blk.h         |  2 +-
>>   3 files changed, 46 insertions(+), 23 deletions(-)
>>
>> diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
>> index 0cb1e9873aab..58d57c482acd 100644
>> --- a/block/bfq-iosched.c
>> +++ b/block/bfq-iosched.c
>> @@ -454,17 +454,13 @@ static struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
>>    */
>>   static struct bfq_io_cq *bfq_bic_lookup(struct request_queue *q)
>>   {
>> -	struct bfq_io_cq *icq;
>> -	unsigned long flags;
>> -
>> -	if (!current->io_context)
>> -		return NULL;
>> +	struct io_cq *icq;
>>   
>> -	spin_lock_irqsave(&q->queue_lock, flags);
>> -	icq = icq_to_bic(ioc_lookup_icq(q));
>> -	spin_unlock_irqrestore(&q->queue_lock, flags);
>> +	rcu_read_lock();
>> +	icq = ioc_lookup_icq_rcu(q);
>> +	rcu_read_unlock();
>>   
>> -	return icq;
>> +	return icq_to_bic(icq);
> 
> icq cannot be NULL here ? If it can, that needs checking, otherwise,
> icq_to_bic() will return a bad address.

See the comments in icq_to_bic, this is fine.

static struct bfq_io_cq *icq_to_bic(struct io_cq *icq)
{
         /* bic->icq is the first member, %NULL will convert to %NULL */
         return container_of(icq, struct bfq_io_cq, icq);
}

> 
>>   }
>>   
>>   /*
>> @@ -2456,16 +2452,10 @@ static void bfq_remove_request(struct request_queue *q,
>>   static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
>>   		unsigned int nr_segs)
>>   {
>> +	/* bic will not be freed until current or elevator exit */
> 
> I would drop this comment, or move it somewhere else as having a comment in the
> declarations seems odd.

Ok, I'll drop the comment.
> 
>> +	struct bfq_io_cq *bic = bfq_bic_lookup(q);
>>   	struct bfq_data *bfqd = q->elevator->elevator_data;
>>   	struct request *free = NULL;
>> -	/*
>> -	 * bfq_bic_lookup grabs the queue_lock: invoke it now and
>> -	 * store its return value for later use, to avoid nesting
>> -	 * queue_lock inside the bfqd->lock. We assume that the bic
>> -	 * returned by bfq_bic_lookup does not go away before
>> -	 * bfqd->lock is taken.
>> -	 */
>> -	struct bfq_io_cq *bic = bfq_bic_lookup(q);
>>   	bool ret;
>>   
>>   	spin_lock_irq(&bfqd->lock);
>> diff --git a/block/blk-ioc.c b/block/blk-ioc.c
>> index ce82770c72ab..0be097a37e22 100644
>> --- a/block/blk-ioc.c
>> +++ b/block/blk-ioc.c
>> @@ -314,7 +314,7 @@ int __copy_io(unsigned long clone_flags, struct task_struct *tsk)
>>    * Look up io_cq associated with @ioc - @q pair from @ioc.  Must be called
>>    * with @q->queue_lock held.
>>    */
>> -struct io_cq *ioc_lookup_icq(struct request_queue *q)
>> +static struct io_cq *ioc_lookup_icq(struct request_queue *q)
>>   {
>>   	struct io_context *ioc = current->io_context;
>>   	struct io_cq *icq;
>> @@ -341,7 +341,40 @@ struct io_cq *ioc_lookup_icq(struct request_queue *q)
>>   	rcu_read_unlock();
>>   	return icq;
>>   }
>> -EXPORT_SYMBOL(ioc_lookup_icq);
>> +
>> +/**
>> + * ioc_lookup_icq_rcu - lookup io_cq from ioc in io path
>> + * @q: the associated request_queue
>> + *
>> + * Look up io_cq associated with @ioc - @q pair from @ioc.  Must be called
>> + * from io path, either return NULL if current issue io to @q for the first
>> + * time, or return a valid icq.
>> + */
>> +struct io_cq *ioc_lookup_icq_rcu(struct request_queue *q)
>> +{
>> +	struct io_context *ioc = current->io_context;
>> +	struct io_cq *icq;
>> +
>> +	WARN_ON_ONCE(percpu_ref_is_zero(&q->q_usage_counter));
>> +
>> +	if (!ioc)
>> +		return NULL;
>> +
>> +	icq = rcu_dereference(ioc->icq_hint);
>> +	if (icq && icq->q == q)
>> +		return icq;
>> +
>> +	icq = radix_tree_lookup(&ioc->icq_tree, q->id);
>> +	if (!icq)
>> +		return NULL;
>> +
>> +	if (WARN_ON_ONCE(icq->q != q))
>> +		return NULL;
>> +
>> +	rcu_assign_pointer(ioc->icq_hint, icq);
>> +	return icq;
>> +}
>> +EXPORT_SYMBOL(ioc_lookup_icq_rcu);
>>   
>>   /**
>>    * ioc_create_icq - create and link io_cq
>> @@ -420,9 +453,9 @@ struct io_cq *ioc_find_get_icq(struct request_queue *q)
>>   	} else {
>>   		get_io_context(ioc);
>>   
>> -		spin_lock_irq(&q->queue_lock);
>> -		icq = ioc_lookup_icq(q);
>> -		spin_unlock_irq(&q->queue_lock);
>> +		rcu_read_lock();
>> +		icq = ioc_lookup_icq_rcu(q);
>> +		rcu_read_unlock();
>>   	}
>>   
>>   	if (!icq) {
>> diff --git a/block/blk.h b/block/blk.h
>> index 468aa83c5a22..3c078e517d59 100644
>> --- a/block/blk.h
>> +++ b/block/blk.h
>> @@ -460,7 +460,7 @@ static inline void req_set_nomerge(struct request_queue *q, struct request *req)
>>    * Internal io_context interface
>>    */
>>   struct io_cq *ioc_find_get_icq(struct request_queue *q);
>> -struct io_cq *ioc_lookup_icq(struct request_queue *q);
>> +struct io_cq *ioc_lookup_icq_rcu(struct request_queue *q);
>>   #ifdef CONFIG_BLK_ICQ
>>   void ioc_clear_queue(struct request_queue *q);
>>   #else
> 
> The blk-ioc changes should go into there own patch, to separate block layer
> changes and bfq scheduler changes. No ?

Actually bfq is the only user of blk-ioc, in order to separate changes,
should I do following?

patch 1, add helper ioc_lookup_icq_rcu
patch 2, convert bfq to use this helper
patch 3, cleanup the old helper

If so, I'll move above changes in the front of this set.

Thanks,
Kuai
> 
>