block/blk-iocost.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-)
ioc_timer_fn() acquires ioc->lock from timer softirq context. The
io.weight, io.cost.qos and io.cost.model cgroup handlers can take the
same lock from process context, and the direct handler paths must not do
so with interrupts enabled.
A blkcg policy configuration reproducer with lockdep reproduced the
following report:
WARNING: inconsistent lock state
7.1.0-rc2-g1e14adca0199 #1 Not tainted
--------------------------------
inconsistent {SOFTIRQ-ON-W} -> {IN-SOFTIRQ-W} usage.
swapper/2/0 [HC0[0]:SC1[1]:HE0:SE0] takes:
ffff88810f95d0f8 (&ioc->lock){+.?.}-{3:3}, at: ioc_timer_fn+0x3ff/0x3af0
{SOFTIRQ-ON-W} state was registered at:
lock_acquire+0xd4/0x290
_raw_spin_lock+0x3a/0x70
ioc_weight_write+0x35a/0x420
cgroup_file_write+0x1c5/0x4b0
kernfs_fop_write_iter+0x1d7/0x280
vfs_write+0x580/0x630
ksys_write+0xec/0x190
do_syscall_64+0x156/0x490
entry_SYSCALL_64_after_hwframe+0x77/0x7f
Possible unsafe locking scenario:
CPU0
----
lock(&ioc->lock);
<Interrupt>
lock(&ioc->lock);
*** DEADLOCK ***
1 lock held by swapper/2/0:
#0: ffffc90000230d20 ((&ioc->timer)){+.-.}-{0:0}, at:
call_timer_fn+0xba/0x3a0
stack backtrace:
CPU: 2 UID: 0 PID: 0 Comm: swapper/2 Not tainted 7.1.0-rc2-g1e14adca0199 #1 PREEMPT ea13f83d4b74a12510d20db4a7d9a0fe8275f05c
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-5.fc42 04/01/2014
Call Trace:
<IRQ>
dump_stack_lvl+0x54/0x70
print_usage_bug+0x26d/0x280
mark_lock_irq+0x3ef/0x400
mark_lock+0x117/0x190
__lock_acquire+0x592/0x2850
lock_acquire+0xd4/0x290
_raw_spin_lock_irq+0x49/0x80
ioc_timer_fn+0x3ff/0x3af0
call_timer_fn+0x120/0x3a0
__run_timer_base+0x3ad/0x490
run_timer_softirq+0x31/0x60
handle_softirqs+0x1a0/0x550
__irq_exit_rcu+0x8c/0x150
irq_exit_rcu+0xe/0x20
sysvec_apic_timer_interrupt+0x6e/0x80
</IRQ>
Use spin_lock_irq() in the affected process-context handlers. The default
io.weight update already holds blkcg->lock with spin_lock_irq(), so the
nested ioc->lock acquisition there is already IRQ-safe and is left as a
plain spin_lock().
Fixes: 7caa47151ab2 ("blkcg: implement blk-iocost")
Signed-off-by: Yu Kuai <yukuai@fygo.io>
---
block/blk-iocost.c | 12 ++++++------
1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/block/blk-iocost.c b/block/blk-iocost.c
index 0cca88a366dc..493cba5523a8 100644
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -3155,11 +3155,11 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf,
goto einval;
}
- spin_lock(&iocg->ioc->lock);
+ spin_lock_irq(&iocg->ioc->lock);
iocg->cfg_weight = v * WEIGHT_ONE;
ioc_now(iocg->ioc, &now);
weight_updated(iocg, &now);
- spin_unlock(&iocg->ioc->lock);
+ spin_unlock_irq(&iocg->ioc->lock);
blkg_conf_exit(&ctx);
return nbytes;
@@ -3180,7 +3180,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
if (!dname)
return 0;
- spin_lock(&ioc->lock);
+ spin_lock_irq(&ioc->lock);
seq_printf(sf, "%s enable=%d ctrl=%s rpct=%u.%02u rlat=%u wpct=%u.%02u wlat=%u min=%u.%02u max=%u.%02u\n",
dname, ioc->enabled, ioc->user_qos_params ? "user" : "auto",
ioc->params.qos[QOS_RPPM] / 10000,
@@ -3193,7 +3193,7 @@ static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd,
ioc->params.qos[QOS_MIN] % 10000 / 100,
ioc->params.qos[QOS_MAX] / 10000,
ioc->params.qos[QOS_MAX] % 10000 / 100);
- spin_unlock(&ioc->lock);
+ spin_unlock_irq(&ioc->lock);
return 0;
}
@@ -3378,14 +3378,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf,
if (!dname)
return 0;
- spin_lock(&ioc->lock);
+ spin_lock_irq(&ioc->lock);
seq_printf(sf, "%s ctrl=%s model=linear "
"rbps=%llu rseqiops=%llu rrandiops=%llu "
"wbps=%llu wseqiops=%llu wrandiops=%llu\n",
dname, ioc->user_cost_model ? "user" : "auto",
u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS],
u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]);
- spin_unlock(&ioc->lock);
+ spin_unlock_irq(&ioc->lock);
return 0;
}
--
2.51.0
On 5/31/26 11:13 PM, Yu Kuai wrote: > @@ -3378,14 +3378,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf, > if (!dname) > return 0; > > - spin_lock(&ioc->lock); > + spin_lock_irq(&ioc->lock); > seq_printf(sf, "%s ctrl=%s model=linear " > "rbps=%llu rseqiops=%llu rrandiops=%llu " > "wbps=%llu wseqiops=%llu wrandiops=%llu\n", > dname, ioc->user_cost_model ? "user" : "auto", > u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS], > u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]); > - spin_unlock(&ioc->lock); > + spin_unlock_irq(&ioc->lock); > return 0; > } This change is wrong. ioc_cost_model_prfill() only has one caller, namely blkcg_print_blkgs(). blkcg_print_blkgs() calls the above function with interrupts disabled. The spin_unlock_irq(&ioc->lock) at the end of the above function enables interrupts while q->queue_lock is held. If an interrupt happens on the same CPU core before q->queue_lock is unlocked, and that interrupt tries to lock q->queue_lock, a deadlock will occur. Bart.
On 6/1/26 3:50 PM, Bart Van Assche wrote: > On 5/31/26 11:13 PM, Yu Kuai wrote: >> @@ -3378,14 +3378,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf, >> if (!dname) >> return 0; >> - spin_lock(&ioc->lock); >> + spin_lock_irq(&ioc->lock); >> seq_printf(sf, "%s ctrl=%s model=linear " >> "rbps=%llu rseqiops=%llu rrandiops=%llu " >> "wbps=%llu wseqiops=%llu wrandiops=%llu\n", >> dname, ioc->user_cost_model ? "user" : "auto", >> u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS], >> u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]); >> - spin_unlock(&ioc->lock); >> + spin_unlock_irq(&ioc->lock); >> return 0; >> } > > This change is wrong. ioc_cost_model_prfill() only has one caller, > namely blkcg_print_blkgs(). blkcg_print_blkgs() calls the above function > with interrupts disabled. The spin_unlock_irq(&ioc->lock) at the end of > the above function enables interrupts while q->queue_lock is held. If an > interrupt happens on the same CPU core before q->queue_lock is unlocked, > and that interrupt tries to lock q->queue_lock, a deadlock will occur. Agree, it's broken. Which makes me suspect of the traces shown. Yu, can you please shed some light on this? I've dropped it, thanks Bart. -- Jens Axboe
Hi, 在 2026/6/2 21:25, Jens Axboe 写道: > On 6/1/26 3:50 PM, Bart Van Assche wrote: >> On 5/31/26 11:13 PM, Yu Kuai wrote: >>> @@ -3378,14 +3378,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf, >>> if (!dname) >>> return 0; >>> - spin_lock(&ioc->lock); >>> + spin_lock_irq(&ioc->lock); >>> seq_printf(sf, "%s ctrl=%s model=linear " >>> "rbps=%llu rseqiops=%llu rrandiops=%llu " >>> "wbps=%llu wseqiops=%llu wrandiops=%llu\n", >>> dname, ioc->user_cost_model ? "user" : "auto", >>> u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS], >>> u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]); >>> - spin_unlock(&ioc->lock); >>> + spin_unlock_irq(&ioc->lock); >>> return 0; >>> } >> This change is wrong. ioc_cost_model_prfill() only has one caller, >> namely blkcg_print_blkgs(). blkcg_print_blkgs() calls the above function >> with interrupts disabled. The spin_unlock_irq(&ioc->lock) at the end of >> the above function enables interrupts while q->queue_lock is held. If an >> interrupt happens on the same CPU core before q->queue_lock is unlocked, >> and that interrupt tries to lock q->queue_lock, a deadlock will occur. > Agree, it's broken. Which makes me suspect of the traces shown. Yu, > can you please shed some light on this? Looks like my reply is in your spam again :( The trace is from ioc_weight_write(), which do have the problem. And while reviewing related code, I'm wrong to think ioc_cost_model_prfill() have the same problem and changed it as well. > > I've dropped it, thanks Bart. I'll send a v2, and only fix ioc_weight_write(). > -- Thansk, Kuai
Hi, 在 2026/6/2 23:56, Yu Kuai 写道: > Hi, > > 在 2026/6/2 21:25, Jens Axboe 写道: >> On 6/1/26 3:50 PM, Bart Van Assche wrote: >>> On 5/31/26 11:13 PM, Yu Kuai wrote: >>>> @@ -3378,14 +3378,14 @@ static u64 ioc_cost_model_prfill(struct seq_file *sf, >>>> if (!dname) >>>> return 0; >>>> - spin_lock(&ioc->lock); >>>> + spin_lock_irq(&ioc->lock); >>>> seq_printf(sf, "%s ctrl=%s model=linear " >>>> "rbps=%llu rseqiops=%llu rrandiops=%llu " >>>> "wbps=%llu wseqiops=%llu wrandiops=%llu\n", >>>> dname, ioc->user_cost_model ? "user" : "auto", >>>> u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS], >>>> u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], u[I_LCOEF_WRANDIOPS]); >>>> - spin_unlock(&ioc->lock); >>>> + spin_unlock_irq(&ioc->lock); >>>> return 0; >>>> } >>> This change is wrong. ioc_cost_model_prfill() only has one caller, >>> namely blkcg_print_blkgs(). blkcg_print_blkgs() calls the above function >>> with interrupts disabled. The spin_unlock_irq(&ioc->lock) at the end of >>> the above function enables interrupts while q->queue_lock is held. If an >>> interrupt happens on the same CPU core before q->queue_lock is unlocked, >>> and that interrupt tries to lock q->queue_lock, a deadlock will occur. >> Agree, it's broken. Which makes me suspect of the traces shown. Yu, >> can you please shed some light on this? > Looks like my reply is in your spam again :( > > The trace is from ioc_weight_write(), which do have the problem. And > while reviewing related code, I'm wrong to think ioc_cost_model_prfill() > have the same problem and changed it as well. > >> I've dropped it, thanks Bart. > I'll send a v2, and only fix ioc_weight_write(). I just update the latest branch and try this patch, however I didn't reporduce the problem. And turns out, blkg_conf_prep() already disable irq by spin_lock_irq(&q->queue_lock). So there is no problem at all. The trace I found is because there are some pending patches to convert protecting blkcg from queue_lock to blkcg_mutex, and the spin_lock_irq(&q->queue_lock) is removed. Sorry for the noise, I should have checked if this problem was introduced by myself first. And thanks Bart to catch it. > -- Thansk, Kuai
Hi, 在 2026/6/2 5:50, Bart Van Assche 写道: > On 5/31/26 11:13 PM, Yu Kuai wrote: >> @@ -3378,14 +3378,14 @@ static u64 ioc_cost_model_prfill(struct >> seq_file *sf, >> if (!dname) >> return 0; >> - spin_lock(&ioc->lock); >> + spin_lock_irq(&ioc->lock); >> seq_printf(sf, "%s ctrl=%s model=linear " >> "rbps=%llu rseqiops=%llu rrandiops=%llu " >> "wbps=%llu wseqiops=%llu wrandiops=%llu\n", >> dname, ioc->user_cost_model ? "user" : "auto", >> u[I_LCOEF_RBPS], u[I_LCOEF_RSEQIOPS], u[I_LCOEF_RRANDIOPS], >> u[I_LCOEF_WBPS], u[I_LCOEF_WSEQIOPS], >> u[I_LCOEF_WRANDIOPS]); >> - spin_unlock(&ioc->lock); >> + spin_unlock_irq(&ioc->lock); >> return 0; >> } > > This change is wrong. ioc_cost_model_prfill() only has one caller, > namely blkcg_print_blkgs(). blkcg_print_blkgs() calls the above function > with interrupts disabled. The spin_unlock_irq(&ioc->lock) at the end of > the above function enables interrupts while q->queue_lock is held. If an > interrupt happens on the same CPU core before q->queue_lock is unlocked, > and that interrupt tries to lock q->queue_lock, a deadlock will occur. Yes, this is correct. Sorry I just notice this is a sysfs api like other places and didn't notice the problem. So ioc_weight_write() is the only place that need the fix. Jens, please let me not if you want me to send a v2 or a fix. > > Bart. -- Thansk, Kuai
On Mon, 01 Jun 2026 14:13:12 +0800, Yu Kuai wrote:
> ioc_timer_fn() acquires ioc->lock from timer softirq context. The
> io.weight, io.cost.qos and io.cost.model cgroup handlers can take the
> same lock from process context, and the direct handler paths must not do
> so with interrupts enabled.
>
> A blkcg policy configuration reproducer with lockdep reproduced the
> following report:
>
> [...]
Applied, thanks!
[1/1] blk-iocost: use irq-safe locking in cgroup handlers
commit: 1059a743064b873aaaf34ebcff89110c7753fa32
Best regards,
--
Jens Axboe
© 2016 - 2026 Red Hat, Inc.