fs/bcachefs/replicas.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-)
Use mutex_trylock instead of mutex_lock in bch2_replicas_entry_validate to
prevent potential deadlock[1].
[1] https://syzkaller.appspot.com/bug?extid=4d24267b490e2b68a5fa
Reported-by: syzbot+4d24267b490e2b68a5fa@syzkaller.appspotmail.com
Closes: https://syzkaller.appspot.com/bug?extid=4d24267b490e2b68a5fa
Fixes: 49fd90b2cc33 ("bcachefs: Fix unlocked access to c->disk_sb.sb in bch2_replicas_entry_validate()")
Signed-off-by: Piotr Zalewski <pZ010001011111@proton.me>
---
fs/bcachefs/replicas.c | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index bcb3276747e0..d301832c5a1b 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -98,9 +98,10 @@ int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r,
struct bch_fs *c,
struct printbuf *err)
{
- mutex_lock(&c->sb_lock);
+ int acquired = mutex_trylock(&c->sb_lock);
int ret = bch2_replicas_entry_validate_locked(r, c->disk_sb.sb, err);
- mutex_unlock(&c->sb_lock);
+ if (acquired)
+ mutex_unlock(&c->sb_lock);
return ret;
}
--
2.46.2
On Mon, Oct 07, 2024 at 08:55:49PM GMT, Piotr Zalewski wrote: > Use mutex_trylock instead of mutex_lock in bch2_replicas_entry_validate to > prevent potential deadlock[1]. > > [1] https://syzkaller.appspot.com/bug?extid=4d24267b490e2b68a5fa No good, skipping locking is not the right way to fix this. I fixed this with the below patch - we've got a different way of checking if a device exists when the filesystem is online. From bf4baaa087e2be0279991f1dbf9acaa7a4c9148c Mon Sep 17 00:00:00 2001 From: Kent Overstreet <kent.overstreet@linux.dev> Date: Sat, 5 Oct 2024 17:37:02 -0400 Subject: [PATCH] bcachefs: Fix lockdep splat in bch2_accounting_read We can't take sb_lock while holding mark_lock, so split out replicas_entry_validate() and replicas_entry_sb_validate() - replicas_entry_validate() now uses the normal online device interface. 00039 ========= TEST set_option 00039 00039 WATCHDOG 30 00040 bcachefs (vdb): starting version 1.12: rebalance_work_acct_fix opts=errors=panic 00040 bcachefs (vdb): initializing new filesystem 00040 bcachefs (vdb): going read-write 00040 bcachefs (vdb): marking superblocks 00040 bcachefs (vdb): initializing freespace 00040 bcachefs (vdb): done initializing freespace 00040 bcachefs (vdb): reading snapshots table 00040 bcachefs (vdb): reading snapshots done 00040 bcachefs (vdb): done starting filesystem 00040 zstd 00041 bcachefs (vdb): shutting down 00041 bcachefs (vdb): going read-only 00041 bcachefs (vdb): finished waiting for writes to stop 00041 bcachefs (vdb): flushing journal and stopping allocators, journal seq 3 00041 bcachefs (vdb): flushing journal and stopping allocators complete, journal seq 11 00041 bcachefs (vdb): shutdown complete, journal seq 12 00041 bcachefs (vdb): marking filesystem clean 00041 bcachefs (vdb): shutdown complete 00041 Setting option on offline fs 00041 bch2_write_super(): fatal error : attempting to write superblock that wasn't version downgraded (1.12: (unknown version) > 1.10: disk_accounting_v3) 00041 fatal error - emergency read only 00041 bch2_write_super(): fatal error : attempting to write superblock that wasn't version downgraded (1.12: (unknown version) > 1.10: disk_accounting_v3) 00042 bcachefs (vdb): starting version 1.12: rebalance_work_acct_fix opts=errors=panic,compression=zstd 00042 bcachefs (vdb): recovering from clean shutdown, journal seq 12 00042 bcachefs (vdb): accounting_read... 00042 00042 ====================================================== 00042 WARNING: possible circular locking dependency detected 00042 6.12.0-rc1-ktest-g805e938a8502 #6807 Not tainted 00042 ------------------------------------------------------ 00042 mount.bcachefs/665 is trying to acquire lock: 00045 ffffff80cc280908 (&c->sb_lock){+.+.}-{3:3}, at: bch2_replicas_entry_validate (fs/bcachefs/replicas.c:102) 00045 00045 but task is already holding lock: 00048 ffffff80cc284870 (&c->mark_lock){++++}-{0:0}, at: bch2_accounting_read (fs/bcachefs/disk_accounting.c:670 (discriminator 1)) 00048 00048 which lock already depends on the new lock. 00048 00048 00048 the existing dependency chain (in reverse order) is: 00048 00048 -> #1 (&c->mark_lock){++++}-{0:0}: 00049 percpu_down_write (kernel/locking/percpu-rwsem.c:232) 00052 bch2_sb_replicas_to_cpu_replicas (fs/bcachefs/replicas.c:583) 00055 bch2_sb_to_fs (fs/bcachefs/super-io.c:614) 00057 bch2_fs_open (fs/bcachefs/super.c:828 fs/bcachefs/super.c:2050) 00060 bch2_fs_get_tree (fs/bcachefs/fs.c:2067) 00062 vfs_get_tree (fs/super.c:1801) 00064 path_mount (fs/namespace.c:3507 fs/namespace.c:3834) 00066 __arm64_sys_mount (fs/namespace.c:3847 fs/namespace.c:4055 fs/namespace.c:4032 fs/namespace.c:4032) 00067 invoke_syscall.constprop.0 (arch/arm64/include/asm/syscall.h:61 arch/arm64/kernel/syscall.c:54) 00068 do_el0_svc (include/linux/thread_info.h:127 (discriminator 2) arch/arm64/kernel/syscall.c:140 (discriminator 2) arch/arm64/kernel/syscall.c:151 (discriminator 2)) 00069 el0_svc (arch/arm64/include/asm/irqflags.h:82 arch/arm64/include/asm/irqflags.h:123 arch/arm64/include/asm/irqflags.h:136 arch/arm64/kernel/entry-common.c:165 arch/arm64/kernel/entry-common.c:178 arch/arm64/kernel/entry-common.c:713) 00069 ========= FAILED TIMEOUT set_option in 30s Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev> diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index bcb3276747e0..797da1032318 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -66,9 +66,9 @@ void bch2_replicas_entry_to_text(struct printbuf *out, prt_printf(out, "]"); } -static int bch2_replicas_entry_validate_locked(struct bch_replicas_entry_v1 *r, - struct bch_sb *sb, - struct printbuf *err) +static int bch2_replicas_entry_sb_validate(struct bch_replicas_entry_v1 *r, + struct bch_sb *sb, + struct printbuf *err) { if (!r->nr_devs) { prt_printf(err, "no devices in entry "); @@ -98,10 +98,28 @@ int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, struct bch_fs *c, struct printbuf *err) { - mutex_lock(&c->sb_lock); - int ret = bch2_replicas_entry_validate_locked(r, c->disk_sb.sb, err); - mutex_unlock(&c->sb_lock); - return ret; + if (!r->nr_devs) { + prt_printf(err, "no devices in entry "); + goto bad; + } + + if (r->nr_required > 1 && + r->nr_required >= r->nr_devs) { + prt_printf(err, "bad nr_required in entry "); + goto bad; + } + + for (unsigned i = 0; i < r->nr_devs; i++) + if (r->devs[i] != BCH_SB_MEMBER_INVALID && + !bch2_dev_exists(c, r->devs[i])) { + prt_printf(err, "invalid device %u in entry ", r->devs[i]); + goto bad; + } + + return 0; +bad: + bch2_replicas_entry_to_text(err, r); + return -BCH_ERR_invalid_replicas_entry; } void bch2_cpu_replicas_to_text(struct printbuf *out, @@ -686,7 +704,7 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, struct bch_replicas_entry_v1 *e = cpu_replicas_entry(cpu_r, i); - int ret = bch2_replicas_entry_validate_locked(e, sb, err); + int ret = bch2_replicas_entry_sb_validate(e, sb, err); if (ret) return ret;
© 2016 - 2024 Red Hat, Inc.