[PATCH] blk-iocost: use irq-safe locking in cgroup handlers

Yu Kuai posted 1 patch 6 days, 23 hours ago
block/blk-iocost.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
[PATCH] blk-iocost: use irq-safe locking in cgroup handlers
Posted by Yu Kuai 6 days, 23 hours ago
ioc_timer_fn() acquires ioc->lock from timer softirq context. The
io.weight, io.cost.qos and io.cost.model cgroup handlers can take the
same lock from process context, and the direct handler paths must not do
so with interrupts enabled.

A blkcg policy configuration reproducer with lockdep reproduced the
following report:

  WARNING: inconsistent lock state
  7.1.0-rc2-g1e14adca0199 #1 Not tainted
  --------------------------------
  inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
  swapper/2/0 [HC0[0]:SC1[1]:HE0:SE0] takes:
  ffff88810f95d0f8 (&ioc->lock){+.?.}-{3:3}, at: ioc_timer_fn+0x3ff/0x3af0
  {SOFTIRQ-ON-W} state was registered at:
    lock_acquire+0xd4/0x290
    _raw_spin_lock+0x3a/0x70
    ioc_weight_write+0x35a/0x420
    cgroup_file_write+0x1c5/0x4b0
    kernfs_fop_write_iter+0x1d7/0x280
    vfs_write+0x580/0x630
    ksys_write+0xec/0x190
    do_syscall_64+0x156/0x490
    entry_SYSCALL_64_after_hwframe+0x77/0x7f

  Possible unsafe locking scenario:

        CPU0
        ----
    lock(&ioc->lock);
    <Interrupt>
      lock(&ioc->lock);

     *** DEADLOCK ***

  1 lock held by swapper/2/0:
   #0: ffffc90000230d20 ((&ioc->timer)){+.-.}-{0:0}, at:
       call_timer_fn+0xba/0x3a0
  stack backtrace:
  CPU: 2 UID: 0 PID: 0 Comm: swapper/2 Not tainted 7.1.0-rc2-g1e14adca0199 #1 PREEMPT  ea13f83d4b74a12510d20db4a7d9a0fe8275f05c
  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-5.fc42 04/01/2014
  Call Trace:
   <IRQ>
   dump_stack_lvl+0x54/0x70
   print_usage_bug+0x26d/0x280
   mark_lock_irq+0x3ef/0x400
   mark_lock+0x117/0x190
   __lock_acquire+0x592/0x2850
   lock_acquire+0xd4/0x290
   _raw_spin_lock_irq+0x49/0x80
   ioc_timer_fn+0x3ff/0x3af0
   call_timer_fn+0x120/0x3a0
   __run_timer_base+0x3ad/0x490
   run_timer_softirq+0x31/0x60
   handle_softirqs+0x1a0/0x550
   __irq_exit_rcu+0x8c/0x150
   irq_exit_rcu+0xe/0x20
   sysvec_apic_timer_interrupt+0x6e/0x80
   </IRQ>

Use spin_lock_irq() in the affected process-context handlers. The default
io.weight update already holds blkcg->lock with spin_lock_irq(), so the
nested ioc->lock acquisition there is already IRQ-safe and is left as a
plain spin_lock().

Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost")
Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
 block/blk-iocost.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 0cca88a366dc..493cba5523a8 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3155,11 +3155,11 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
 			goto einval;
 	}
 
-	spin_lock(&iocg->ioc->lock);
+	spin_lock_irq(&iocg->ioc->lock);
 	iocg->cfg_weight = v * WEIGHT_ONE;
 	ioc_now(iocg->ioc, &now);
 	weight_updated(iocg, &now);
-	spin_unlock(&iocg->ioc->lock);
+	spin_unlock_irq(&iocg->ioc->lock);
 
 	blkg_conf_exit(&ctx);
 	return nbytes;
@@ -3180,7 +3180,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
 	if (!dname)
 		return 0;
 
-	spin_lock(&ioc->lock);
+	spin_lock_irq(&ioc->lock);
 	seq_printf(sf, "%s enable=%d ctrl=%s rpct=%u.%02u rlat=%u wpct=%u.%02u wlat=%u min=%u.%02u max=%u.%02u\n",
 		   dname, ioc->enabled, ioc->user_qos_params ? "user" : "auto",
 		   ioc->params.qos[QOS_RPPM] / 10000,
@@ -3193,7 +3193,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
 		   ioc->params.qos[QOS_MIN] % 10000 / 100,
 		   ioc->params.qos[QOS_MAX] / 10000,
 		   ioc->params.qos[QOS_MAX] % 10000 / 100);
-	spin_unlock(&ioc->lock);
+	spin_unlock_irq(&ioc->lock);
 	return 0;
 }
 
@@ -3378,14 +3378,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf,
 	if (!dname)
 		return 0;
 
-	spin_lock(&ioc->lock);
+	spin_lock_irq(&ioc->lock);
 	seq_printf(sf, "%s ctrl=%s model=linear "
 		   "rbps=%llu rseqiops=%llu rrandiops=%llu "
 		   "wbps=%llu wseqiops=%llu wrandiops=%llu\n",
 		   dname, ioc->user_cost_model ? "user" : "auto",
 		   u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
 		   u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]);
-	spin_unlock(&ioc->lock);
+	spin_unlock_irq(&ioc->lock);
 	return 0;
 }
 
-- 
2.51.0
Re: [PATCH] blk-iocost: use irq-safe locking in cgroup handlers
Posted by Bart Van Assche 6 days, 7 hours ago
On 5/31/26 11:13 PM, Yu Kuai wrote:
> @@ -3378,14 +3378,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf,
>   	if (!dname)
>   		return 0;
>   
> -	spin_lock(&ioc->lock);
> +	spin_lock_irq(&ioc->lock);
>   	seq_printf(sf, "%s ctrl=%s model=linear "
>   		   "rbps=%llu rseqiops=%llu rrandiops=%llu "
>   		   "wbps=%llu wseqiops=%llu wrandiops=%llu\n",
>   		   dname, ioc->user_cost_model ? "user" : "auto",
>   		   u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
>   		   u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]);
> -	spin_unlock(&ioc->lock);
> +	spin_unlock_irq(&ioc->lock);
>   	return 0;
>   }

This change is wrong. ioc_cost_model_prfill() only has one caller,
namely blkcg_print_blkgs(). blkcg_print_blkgs() calls the above function
with interrupts disabled. The spin_unlock_irq(&ioc->lock) at the end of
the above function enables interrupts while q->queue_lock is held. If an
interrupt happens on the same CPU core before q->queue_lock is unlocked,
and that interrupt tries to lock q->queue_lock, a deadlock will occur.

Bart.
Re: [PATCH] blk-iocost: use irq-safe locking in cgroup handlers
Posted by Jens Axboe 5 days, 16 hours ago
On 6/1/26 3:50 PM, Bart Van Assche wrote:
> On 5/31/26 11:13 PM, Yu Kuai wrote:
>> @@ -3378,14 +3378,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf,
>>       if (!dname)
>>           return 0;
>>   -    spin_lock(&ioc->lock);
>> +    spin_lock_irq(&ioc->lock);
>>       seq_printf(sf, "%s ctrl=%s model=linear "
>>              "rbps=%llu rseqiops=%llu rrandiops=%llu "
>>              "wbps=%llu wseqiops=%llu wrandiops=%llu\n",
>>              dname, ioc->user_cost_model ? "user" : "auto",
>>              u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
>>              u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]);
>> -    spin_unlock(&ioc->lock);
>> +    spin_unlock_irq(&ioc->lock);
>>       return 0;
>>   }
> 
> This change is wrong. ioc_cost_model_prfill() only has one caller,
> namely blkcg_print_blkgs(). blkcg_print_blkgs() calls the above function
> with interrupts disabled. The spin_unlock_irq(&ioc->lock) at the end of
> the above function enables interrupts while q->queue_lock is held. If an
> interrupt happens on the same CPU core before q->queue_lock is unlocked,
> and that interrupt tries to lock q->queue_lock, a deadlock will occur.

Agree, it's broken. Which makes me suspect of the traces shown. Yu,
can you please shed some light on this?

I've dropped it, thanks Bart.

-- 
Jens Axboe

Re: [PATCH] blk-iocost: use irq-safe locking in cgroup handlers
Posted by Yu Kuai 5 days, 13 hours ago
Hi,

在 2026/6/2 21:25, Jens Axboe 写道:
> On 6/1/26 3:50 PM, Bart Van Assche wrote:
>> On 5/31/26 11:13 PM, Yu Kuai wrote:
>>> @@ -3378,14 +3378,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf,
>>>        if (!dname)
>>>            return 0;
>>>    -    spin_lock(&ioc->lock);
>>> +    spin_lock_irq(&ioc->lock);
>>>        seq_printf(sf, "%s ctrl=%s model=linear "
>>>               "rbps=%llu rseqiops=%llu rrandiops=%llu "
>>>               "wbps=%llu wseqiops=%llu wrandiops=%llu\n",
>>>               dname, ioc->user_cost_model ? "user" : "auto",
>>>               u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
>>>               u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]);
>>> -    spin_unlock(&ioc->lock);
>>> +    spin_unlock_irq(&ioc->lock);
>>>        return 0;
>>>    }
>> This change is wrong. ioc_cost_model_prfill() only has one caller,
>> namely blkcg_print_blkgs(). blkcg_print_blkgs() calls the above function
>> with interrupts disabled. The spin_unlock_irq(&ioc->lock) at the end of
>> the above function enables interrupts while q->queue_lock is held. If an
>> interrupt happens on the same CPU core before q->queue_lock is unlocked,
>> and that interrupt tries to lock q->queue_lock, a deadlock will occur.
> Agree, it's broken. Which makes me suspect of the traces shown. Yu,
> can you please shed some light on this?

Looks like my reply is in your spam again :(

The trace is from ioc_weight_write(), which do have the problem. And
while reviewing related code, I'm wrong to think ioc_cost_model_prfill()
have the same problem and changed it as well.

>
> I've dropped it, thanks Bart.

I'll send a v2, and only fix ioc_weight_write().

>
-- 
Thansk,
Kuai
Re: [PATCH] blk-iocost: use irq-safe locking in cgroup handlers
Posted by Yu Kuai 5 days, 13 hours ago
Hi,

在 2026/6/2 23:56, Yu Kuai 写道:
> Hi,
>
> 在 2026/6/2 21:25, Jens Axboe 写道:
>> On 6/1/26 3:50 PM, Bart Van Assche wrote:
>>> On 5/31/26 11:13 PM, Yu Kuai wrote:
>>>> @@ -3378,14 +3378,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf,
>>>>         if (!dname)
>>>>             return 0;
>>>>     -    spin_lock(&ioc->lock);
>>>> +    spin_lock_irq(&ioc->lock);
>>>>         seq_printf(sf, "%s ctrl=%s model=linear "
>>>>                "rbps=%llu rseqiops=%llu rrandiops=%llu "
>>>>                "wbps=%llu wseqiops=%llu wrandiops=%llu\n",
>>>>                dname, ioc->user_cost_model ? "user" : "auto",
>>>>                u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
>>>>                u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]);
>>>> -    spin_unlock(&ioc->lock);
>>>> +    spin_unlock_irq(&ioc->lock);
>>>>         return 0;
>>>>     }
>>> This change is wrong. ioc_cost_model_prfill() only has one caller,
>>> namely blkcg_print_blkgs(). blkcg_print_blkgs() calls the above function
>>> with interrupts disabled. The spin_unlock_irq(&ioc->lock) at the end of
>>> the above function enables interrupts while q->queue_lock is held. If an
>>> interrupt happens on the same CPU core before q->queue_lock is unlocked,
>>> and that interrupt tries to lock q->queue_lock, a deadlock will occur.
>> Agree, it's broken. Which makes me suspect of the traces shown. Yu,
>> can you please shed some light on this?
> Looks like my reply is in your spam again :(
>
> The trace is from ioc_weight_write(), which do have the problem. And
> while reviewing related code, I'm wrong to think ioc_cost_model_prfill()
> have the same problem and changed it as well.
>
>> I've dropped it, thanks Bart.
> I'll send a v2, and only fix ioc_weight_write().

I just update the latest branch and try this patch, however I didn't reporduce
the problem. And turns out, blkg_conf_prep() already disable irq by
spin_lock_irq(&q->queue_lock). So there is no problem at all.

The trace I found is because there are some pending patches to convert
protecting blkcg from queue_lock to blkcg_mutex, and the
spin_lock_irq(&q->queue_lock) is removed.

Sorry for the noise, I should have checked if this problem was introduced
by myself first. And thanks Bart to catch it.

>
-- 
Thansk,
Kuai
Re: [PATCH] blk-iocost: use irq-safe locking in cgroup handlers
Posted by Yu Kuai 6 days, 1 hour ago
Hi,

在 2026/6/2 5:50, Bart Van Assche 写道:
> On 5/31/26 11:13 PM, Yu Kuai wrote:
>> @@ -3378,14 +3378,14 @@ static u64 ioc_cost_model_prfill(struct 
>> seq_file *sf,
>>       if (!dname)
>>           return 0;
>>   -    spin_lock(&ioc->lock);
>> +    spin_lock_irq(&ioc->lock);
>>       seq_printf(sf, "%s ctrl=%s model=linear "
>>              "rbps=%llu rseqiops=%llu rrandiops=%llu "
>>              "wbps=%llu wseqiops=%llu wrandiops=%llu\n",
>>              dname, ioc->user_cost_model ? "user" : "auto",
>>              u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
>>              u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], 
>> u[I_LCOEF_WRANDIOPS]);
>> -    spin_unlock(&ioc->lock);
>> +    spin_unlock_irq(&ioc->lock);
>>       return 0;
>>   }
>
> This change is wrong. ioc_cost_model_prfill() only has one caller,
> namely blkcg_print_blkgs(). blkcg_print_blkgs() calls the above function
> with interrupts disabled. The spin_unlock_irq(&ioc->lock) at the end of
> the above function enables interrupts while q->queue_lock is held. If an
> interrupt happens on the same CPU core before q->queue_lock is unlocked,
> and that interrupt tries to lock q->queue_lock, a deadlock will occur.

Yes, this is correct. Sorry I just notice this is a sysfs api like other
places and didn't notice the problem. So ioc_weight_write() is the only
place that need the fix.

Jens, please let me not if you want me to send a v2 or a fix.

>
> Bart.

-- 
Thansk,
Kuai
Re: [PATCH] blk-iocost: use irq-safe locking in cgroup handlers
Posted by Jens Axboe 6 days, 10 hours ago
On Mon, 01 Jun 2026 14:13:12 +0800, Yu Kuai wrote:
> ioc_timer_fn() acquires ioc->lock from timer softirq context. The
> io.weight, io.cost.qos and io.cost.model cgroup handlers can take the
> same lock from process context, and the direct handler paths must not do
> so with interrupts enabled.
> 
> A blkcg policy configuration reproducer with lockdep reproduced the
> following report:
> 
> [...]

Applied, thanks!

[1/1] blk-iocost: use irq-safe locking in cgroup handlers
      commit: 1059a743064b873aaaf34ebcff89110c7753fa32

Best regards,
-- 
Jens Axboe