[v2] ocfs2: avoid potential ABBA deadlock by reordering tl_inode lock

[PATCH v2] ocfs2: avoid potential ABBA deadlock by reordering tl_inode lock

Posted by Ivan Pravdin 7 months ago

In ocfs2_move_extent(), tl_inode is currently locked after the global
bitmap inode. However, in ocfs2_flush_truncate_log(), the lock order
is reversed: tl_inode is locked first, followed by the global bitmap
inode.

This creates a classic ABBA deadlock scenario if two threads attempt
these operations concurrently and acquire the locks in different orders.

To prevent this, move the tl_inode locking earlier in
ocfs2_move_extent(), so that it always precedes the global bitmap
inode lock.

No functional changes beyond lock ordering.

Reported-by: syzbot+6bf948e47f9bac7aacfa@syzkaller.appspotmail.com
Closes: https://lore.kernel.org/all/67d5645c.050a0220.1dc86f.0004.GAE@google.com/
Signed-off-by: Ivan Pravdin <ipravdin.official@gmail.com>
---
v1 -> v2: Fixed unlocking order in ocfs2_move_extent.

 fs/ocfs2/move_extents.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 369c7d27befd..aaf8eb2693a4 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -617,6 +617,8 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
 	 */
 	credits += OCFS2_INODE_UPDATE_CREDITS + 1;
 
+	inode_lock(tl_inode);
+
 	/*
 	 * ocfs2_move_extent() didn't reserve any clusters in lock_allocators()
 	 * logic, while we still need to lock the global_bitmap.
@@ -637,13 +639,11 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
 		goto out_unlock_gb_mutex;
 	}
 
-	inode_lock(tl_inode);
-
 	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		mlog_errno(ret);
-		goto out_unlock_tl_inode;
+		goto out_unlock_gb_inode;
 	}
 
 	new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos);
@@ -704,12 +704,13 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
 	ocfs2_commit_trans(osb, handle);
 	brelse(gd_bh);
 
-out_unlock_tl_inode:
-	inode_unlock(tl_inode);
-
+out_unlock_gb_inode:
 	ocfs2_inode_unlock(gb_inode, 1);
+
 out_unlock_gb_mutex:
 	inode_unlock(gb_inode);
+
+	inode_unlock(tl_inode);
 	brelse(gb_bh);
 	iput(gb_inode);
 
-- 
2.45.2

Re: [PATCH v2] ocfs2: avoid potential ABBA deadlock by reordering tl_inode lock

Posted by Joseph Qi 7 months ago


On 2025/7/7 11:29, Ivan Pravdin wrote:
> In ocfs2_move_extent(), tl_inode is currently locked after the global
> bitmap inode. However, in ocfs2_flush_truncate_log(), the lock order
> is reversed: tl_inode is locked first, followed by the global bitmap
> inode.
> 
> This creates a classic ABBA deadlock scenario if two threads attempt
> these operations concurrently and acquire the locks in different orders.
> 
> To prevent this, move the tl_inode locking earlier in
> ocfs2_move_extent(), so that it always precedes the global bitmap
> inode lock.
> 
> No functional changes beyond lock ordering.
> 
> Reported-by: syzbot+6bf948e47f9bac7aacfa@syzkaller.appspotmail.com
> Closes: https://lore.kernel.org/all/67d5645c.050a0220.1dc86f.0004.GAE@google.com/
> Signed-off-by: Ivan Pravdin <ipravdin.official@gmail.com>
> ---
> v1 -> v2: Fixed unlocking order in ocfs2_move_extent.
> 
>  fs/ocfs2/move_extents.c | 13 +++++++------
>  1 file changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
> index 369c7d27befd..aaf8eb2693a4 100644
> --- a/fs/ocfs2/move_extents.c
> +++ b/fs/ocfs2/move_extents.c
> @@ -617,6 +617,8 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
>  	 */
>  	credits += OCFS2_INODE_UPDATE_CREDITS + 1;
>  
> +	inode_lock(tl_inode);
> +
>  	/*
>  	 * ocfs2_move_extent() didn't reserve any clusters in lock_allocators()
>  	 * logic, while we still need to lock the global_bitmap.
> @@ -637,13 +639,11 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
>  		goto out_unlock_gb_mutex;
>  	}
>

You've missed unlock tl_inode when ocfs2_get_system_file_inode() fails.

  
> -	inode_lock(tl_inode);
> -
>  	handle = ocfs2_start_trans(osb, credits);
>  	if (IS_ERR(handle)) {
>  		ret = PTR_ERR(handle);
>  		mlog_errno(ret);
> -		goto out_unlock_tl_inode;
> +		goto out_unlock_gb_inode;
>  	}
>  
>  	new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos);
> @@ -704,12 +704,13 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
>  	ocfs2_commit_trans(osb, handle);
>  	brelse(gd_bh);
>  
> -out_unlock_tl_inode:
> -	inode_unlock(tl_inode);
> -
> +out_unlock_gb_inode:
>  	ocfs2_inode_unlock(gb_inode, 1);
> +
>  out_unlock_gb_mutex:

inode lock has changed to rw_semaphore, so 'mutex' is no longer proper. 

>  	inode_unlock(gb_inode);
> +
> +	inode_unlock(tl_inode);
>  	brelse(gb_bh);
>  	iput(gb_inode);
>  

How about the following alternative:

diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 369c7d27befd..d56c337204f6 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -611,6 +611,8 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
 		goto out;
 	}
 
+	inode_lock(tl_inode);
+
 	/*
 	 * need to count 2 extra credits for global_bitmap inode and
 	 * group descriptor.
@@ -626,7 +628,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
 	if (!gb_inode) {
 		mlog(ML_ERROR, "unable to get global_bitmap inode\n");
 		ret = -EIO;
-		goto out;
+		goto out_unlock_tl_inode;
 	}
 
 	inode_lock(gb_inode);
@@ -634,16 +636,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
 	ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
 	if (ret) {
 		mlog_errno(ret);
-		goto out_unlock_gb_mutex;
+		goto out_unlock_gb_inode;
 	}
 
-	inode_lock(tl_inode);
-
 	handle = ocfs2_start_trans(osb, credits);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		mlog_errno(ret);
-		goto out_unlock_tl_inode;
+		goto out_unlock;
 	}
 
 	new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos);
@@ -703,16 +703,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
 out_commit:
 	ocfs2_commit_trans(osb, handle);
 	brelse(gd_bh);
-
-out_unlock_tl_inode:
-	inode_unlock(tl_inode);
-
+out_unlock:
 	ocfs2_inode_unlock(gb_inode, 1);
-out_unlock_gb_mutex:
+out_unlock_gb_inode:
 	inode_unlock(gb_inode);
 	brelse(gb_bh);
 	iput(gb_inode);
-
+out_unlock_tl_inode:
+	inode_unlock(tl_inode);
 out:
 	if (context->meta_ac) {
 		ocfs2_free_alloc_context(context->meta_ac);

Re: [PATCH v2] ocfs2: avoid potential ABBA deadlock by reordering tl_inode lock

Posted by Ivan Pravdin 7 months ago

On Mon, Jul 07, 2025 at 01:58:35PM GMT, Joseph Qi wrote:
> >  	/*
> >  	 * ocfs2_move_extent() didn't reserve any clusters in lock_allocators()
> >  	 * logic, while we still need to lock the global_bitmap.
> > @@ -637,13 +639,11 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
> >  		goto out_unlock_gb_mutex;
> >  	}
> >
> 
> You've missed unlock tl_inode when ocfs2_get_system_file_inode() fails.

I will add it, thanks.

> 
>   
> > -	inode_lock(tl_inode);
> > -
> >  	handle = ocfs2_start_trans(osb, credits);
> >  	if (IS_ERR(handle)) {
> >  		ret = PTR_ERR(handle);
> >  		mlog_errno(ret);
> > -		goto out_unlock_tl_inode;
> > +		goto out_unlock_gb_inode;
> >  	}
> >  
> >  	new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos);
> > @@ -704,12 +704,13 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
> >  	ocfs2_commit_trans(osb, handle);
> >  	brelse(gd_bh);
> >  
> > -out_unlock_tl_inode:
> > -	inode_unlock(tl_inode);
> > -
> > +out_unlock_gb_inode:
> >  	ocfs2_inode_unlock(gb_inode, 1);
> > +
> >  out_unlock_gb_mutex:
> 
> inode lock has changed to rw_semaphore, so 'mutex' is no longer proper. 
> 
> >  	inode_unlock(gb_inode);
> > +
> > +	inode_unlock(tl_inode);
> >  	brelse(gb_bh);
> >  	iput(gb_inode);
> >  
> 
> How about the following alternative:
> 
> diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
> index 369c7d27befd..d56c337204f6 100644
> --- a/fs/ocfs2/move_extents.c
> +++ b/fs/ocfs2/move_extents.c
> @@ -611,6 +611,8 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
>  		goto out;
>  	}
>  
> +	inode_lock(tl_inode);
> +
>  	/*
>  	 * need to count 2 extra credits for global_bitmap inode and
>  	 * group descriptor.
> @@ -626,7 +628,7 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
>  	if (!gb_inode) {
>  		mlog(ML_ERROR, "unable to get global_bitmap inode\n");
>  		ret = -EIO;
> -		goto out;
> +		goto out_unlock_tl_inode;
>  	}
>  
>  	inode_lock(gb_inode);
> @@ -634,16 +636,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
>  	ret = ocfs2_inode_lock(gb_inode, &gb_bh, 1);
>  	if (ret) {
>  		mlog_errno(ret);
> -		goto out_unlock_gb_mutex;
> +		goto out_unlock_gb_inode;
>  	}
>  
> -	inode_lock(tl_inode);
> -
>  	handle = ocfs2_start_trans(osb, credits);
>  	if (IS_ERR(handle)) {
>  		ret = PTR_ERR(handle);
>  		mlog_errno(ret);
> -		goto out_unlock_tl_inode;
> +		goto out_unlock;
>  	}
>  
>  	new_phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *new_phys_cpos);
> @@ -703,16 +703,14 @@ static int ocfs2_move_extent(struct ocfs2_move_extents_context *context,
>  out_commit:
>  	ocfs2_commit_trans(osb, handle);
>  	brelse(gd_bh);
> -
> -out_unlock_tl_inode:
> -	inode_unlock(tl_inode);
> -
> +out_unlock:
>  	ocfs2_inode_unlock(gb_inode, 1);
> -out_unlock_gb_mutex:
> +out_unlock_gb_inode:
>  	inode_unlock(gb_inode);
>  	brelse(gb_bh);
>  	iput(gb_inode);
> -
> +out_unlock_tl_inode:
> +	inode_unlock(tl_inode);
>  out:
>  	if (context->meta_ac) {
>  		ocfs2_free_alloc_context(context->meta_ac);
> 
> 

This is definitely better. I will update the patch, thank you.

	Ivan Pravdin