bcachefs: Fix deadlocks between fallocate and readahead

[PATCH] bcachefs: Fix deadlocks between fallocate and readahead

Posted by Deepanshu Kartikey 4 months, 1 week ago


There are ABBA deadlocks between fallocate and readahead operations
at two locations in __bchfs_fallocate():

Thread 1 (fallocate):
  bch2_fallocate_dispatch
    inode_lock(&inode->v)
    bch2_pagecache_block_get(inode)  // Acquires two_state_lock
      __bchfs_fallocate
        bch2_clamp_data_hole (or bch2_mark_pagecache_reserved)
          bch2_seek_pagecache_hole
            __filemap_get_folio
              folio_lock()  // BLOCKS - Thread 2 holds it

Thread 2 (readahead via copy_file_range):
  bch2_readahead
    folio_lock()  // Holds page lock
    __bch2_two_state_lock(&pagecache_lock)  // BLOCKS - Thread 1 holds it

The issue is that drop_locks_do() only releases btree transaction locks,
but Thread 2 is blocked waiting for the two_state_lock (pagecache_block)
held by bch2_pagecache_block_get().

Fix by explicitly releasing and re-acquiring the pagecache_block lock
around both blocking operations (bch2_clamp_data_hole and
bch2_mark_pagecache_reserved), following the same pattern used in
bch2_page_fault(). Force a transaction restart after lock release to
ensure consistency.

Reported-by: syzbot+cb91f22d8a581fc19edf@syzkaller.appspotmail.com
Tested-by: syzbot+cb91f22d8a581fc19edf@syzkaller.appspotmail.com
Link: https://syzkaller.appspot.com/bug?extid=cb91f22d8a581fc19edf
Signed-off-by: Deepanshu Kartikey <Kartikey406@gmail.com>
---
 fs/bcachefs/fs-io.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index a233f45875e9..66a60e5f03fc 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -694,13 +694,19 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
 						 &hole_start,
 						 &hole_end,
 						 opts.data_replicas, true)) {
+				/* Release pagecache_block to prevent deadlock with readahead */
+				bch2_pagecache_block_put(inode);
 				ret = drop_locks_do(trans,
 					(bch2_clamp_data_hole(&inode->v,
 							      &hole_start,
 							      &hole_end,
 							      opts.data_replicas, false), 0));
+				bch2_pagecache_block_get(inode);
 				if (ret)
 					goto bkey_err;
+				/* Force transaction restart to revalidate state */
+				ret = -BCH_ERR_transaction_restart;
+				goto bkey_err;
 			}
 			bch2_btree_iter_set_pos(trans, &iter, POS(iter.pos.inode, hole_start));
 
@@ -730,11 +736,17 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
 
 		if (bch2_mark_pagecache_reserved(inode, &hole_start,
 						 iter.pos.offset, true)) {
+			/* Release pagecache_block to prevent deadlock */
+			bch2_pagecache_block_put(inode);
+
 			ret = drop_locks_do(trans,
 				bch2_mark_pagecache_reserved(inode, &hole_start,
 							     iter.pos.offset, false));
+			bch2_pagecache_block_get(inode);
 			if (ret)
 				goto bkey_err;
+			ret = -BCH_ERR_transaction_restart;
+			goto bkey_err;
 		}
 bkey_err:
 		bch2_quota_reservation_put(c, inode, &quota_res);
-- 
2.43.0

Re: [PATCH] bcachefs: Fix deadlocks between fallocate and readahead

Posted by Kent Overstreet 4 months, 1 week ago

On Sat, Sep 27, 2025 at 01:55:03PM +0530, Deepanshu Kartikey wrote:
> 
> There are ABBA deadlocks between fallocate and readahead operations
> at two locations in __bchfs_fallocate():
> 
> Thread 1 (fallocate):
>   bch2_fallocate_dispatch
>     inode_lock(&inode->v)
>     bch2_pagecache_block_get(inode)  // Acquires two_state_lock
>       __bchfs_fallocate
>         bch2_clamp_data_hole (or bch2_mark_pagecache_reserved)
>           bch2_seek_pagecache_hole
>             __filemap_get_folio
>               folio_lock()  // BLOCKS - Thread 2 holds it
> 
> Thread 2 (readahead via copy_file_range):
>   bch2_readahead
>     folio_lock()  // Holds page lock
>     __bch2_two_state_lock(&pagecache_lock)  // BLOCKS - Thread 1 holds it
> 
> The issue is that drop_locks_do() only releases btree transaction locks,
> but Thread 2 is blocked waiting for the two_state_lock (pagecache_block)
> held by bch2_pagecache_block_get().
> 
> Fix by explicitly releasing and re-acquiring the pagecache_block lock
> around both blocking operations (bch2_clamp_data_hole and
> bch2_mark_pagecache_reserved), following the same pattern used in
> bch2_page_fault(). Force a transaction restart after lock release to
> ensure consistency.
> 
> Reported-by: syzbot+cb91f22d8a581fc19edf@syzkaller.appspotmail.com
> Tested-by: syzbot+cb91f22d8a581fc19edf@syzkaller.appspotmail.com
> Link: https://syzkaller.appspot.com/bug?extid=cb91f22d8a581fc19edf
> Signed-off-by: Deepanshu Kartikey <Kartikey406@gmail.com>

Nice find... we still don't have lockdep support for pagecache add lock,
there was one last bit preventing me from applying the patch last I was
working on that.

If you want to join the IRC channel, this is one a couple of us might
want to chew on. Your commit message is good, I'll probably apply it
after it's not 3 am, but this'll be a good one to talk about.

(irc.oftc.net #bcache, and for this one the secret #bcachefs-dev)

> ---
>  fs/bcachefs/fs-io.c | 12 ++++++++++++
>  1 file changed, 12 insertions(+)
> 
> diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
> index a233f45875e9..66a60e5f03fc 100644
> --- a/fs/bcachefs/fs-io.c
> +++ b/fs/bcachefs/fs-io.c
> @@ -694,13 +694,19 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
>  						 &hole_start,
>  						 &hole_end,
>  						 opts.data_replicas, true)) {
> +				/* Release pagecache_block to prevent deadlock with readahead */
> +				bch2_pagecache_block_put(inode);
>  				ret = drop_locks_do(trans,
>  					(bch2_clamp_data_hole(&inode->v,
>  							      &hole_start,
>  							      &hole_end,
>  							      opts.data_replicas, false), 0));
> +				bch2_pagecache_block_get(inode);
>  				if (ret)
>  					goto bkey_err;
> +				/* Force transaction restart to revalidate state */
> +				ret = -BCH_ERR_transaction_restart;
> +				goto bkey_err;
>  			}
>  			bch2_btree_iter_set_pos(trans, &iter, POS(iter.pos.inode, hole_start));
>  
> @@ -730,11 +736,17 @@ static noinline int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
>  
>  		if (bch2_mark_pagecache_reserved(inode, &hole_start,
>  						 iter.pos.offset, true)) {
> +			/* Release pagecache_block to prevent deadlock */
> +			bch2_pagecache_block_put(inode);
> +
>  			ret = drop_locks_do(trans,
>  				bch2_mark_pagecache_reserved(inode, &hole_start,
>  							     iter.pos.offset, false));
> +			bch2_pagecache_block_get(inode);
>  			if (ret)
>  				goto bkey_err;
> +			ret = -BCH_ERR_transaction_restart;
> +			goto bkey_err;
>  		}
>  bkey_err:
>  		bch2_quota_reservation_put(c, inode, &quota_res);
> -- 
> 2.43.0
>