[PATCH v4 2/2] jbd2: gracefully abort on transaction state corruptions

Milos Nikic posted 2 patches 1 month ago
There is a newer version of this series
[PATCH v4 2/2] jbd2: gracefully abort on transaction state corruptions
Posted by Milos Nikic 1 month ago
Auditing the jbd2 codebase reveals several legacy J_ASSERT calls
that enforce internal state machine invariants (e.g., verifying
jh->b_transaction or jh->b_next_transaction pointers).

When these invariants are broken, the journal is in a corrupted
state. However, triggering a fatal panic brings down the entire
system for a localized filesystem error.

This patch targets a specific class of these asserts: those
residing inside functions that natively return integer error codes,
booleans, or error pointers. It replaces the hard J_ASSERTs with
WARN_ON_ONCE to capture the offending stack trace, safely drops
any held locks, gracefully aborts the journal, and returns -EINVAL.

This prevents a catastrophic kernel panic while ensuring the
corrupted journal state is safely contained and upstream callers
(like ext4 or ocfs2) can gracefully handle the aborted handle.

Functions modified in fs/jbd2/transaction.c:
- jbd2__journal_start()
- do_get_write_access()
- jbd2_journal_dirty_metadata()
- jbd2_journal_forget()
- jbd2_journal_try_to_free_buffers()
- jbd2_journal_file_inode()

Signed-off-by: Milos Nikic <nikic.milos@gmail.com>
---
 fs/jbd2/transaction.c | 112 ++++++++++++++++++++++++++++++++----------
 1 file changed, 86 insertions(+), 26 deletions(-)

diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 04d17a5f2a82..bae6c99d635c 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -474,7 +474,8 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
 		return ERR_PTR(-EROFS);
 
 	if (handle) {
-		J_ASSERT(handle->h_transaction->t_journal == journal);
+		if (WARN_ON_ONCE(handle->h_transaction->t_journal != journal))
+			return ERR_PTR(-EINVAL);
 		handle->h_ref++;
 		return handle;
 	}
@@ -1036,7 +1037,13 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
 	 */
 	if (!jh->b_transaction) {
 		JBUFFER_TRACE(jh, "no transaction");
-		J_ASSERT_JH(jh, !jh->b_next_transaction);
+		if (WARN_ON_ONCE(jh->b_next_transaction)) {
+			spin_unlock(&jh->b_state_lock);
+			unlock_buffer(bh);
+			error = -EINVAL;
+			jbd2_journal_abort(journal, error);
+			goto out;
+		}
 		JBUFFER_TRACE(jh, "file as BJ_Reserved");
 		/*
 		 * Make sure all stores to jh (b_modified, b_frozen_data) are
@@ -1069,13 +1076,27 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
 	 */
 	if (jh->b_frozen_data) {
 		JBUFFER_TRACE(jh, "has frozen data");
-		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+		if (WARN_ON_ONCE(jh->b_next_transaction)) {
+			spin_unlock(&jh->b_state_lock);
+			error = -EINVAL;
+			jbd2_journal_abort(journal, error);
+			goto out;
+		}
 		goto attach_next;
 	}
 
 	JBUFFER_TRACE(jh, "owned by older transaction");
-	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
-	J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction);
+	if (WARN_ON_ONCE(jh->b_next_transaction ||
+			 jh->b_transaction !=
+			 journal->j_committing_transaction)) {
+		pr_err("JBD2: %s: assertion failure: b_next_transaction=%p b_transaction=%p j_committing_transaction=%p\n",
+		       journal->j_devname, jh->b_next_transaction,
+		       jh->b_transaction, journal->j_committing_transaction);
+		spin_unlock(&jh->b_state_lock);
+		error = -EINVAL;
+		jbd2_journal_abort(journal, error);
+		goto out;
+	}
 
 	/*
 	 * There is one case we have to be very careful about.  If the
@@ -1496,7 +1517,7 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
 int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 {
 	transaction_t *transaction = handle->h_transaction;
-	journal_t *journal;
+	journal_t *journal = transaction->t_journal;
 	struct journal_head *jh;
 	int ret = 0;
 
@@ -1520,8 +1541,14 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 	if (data_race(jh->b_transaction != transaction &&
 	    jh->b_next_transaction != transaction)) {
 		spin_lock(&jh->b_state_lock);
-		J_ASSERT_JH(jh, jh->b_transaction == transaction ||
-				jh->b_next_transaction == transaction);
+		if (WARN_ON_ONCE(jh->b_transaction != transaction &&
+				 jh->b_next_transaction != transaction)) {
+			pr_err("JBD2: %s: assertion failure: b_transaction=%p transaction=%p b_next_transaction=%p\n",
+			       journal->j_devname, jh->b_transaction,
+			       transaction, jh->b_next_transaction);
+			ret = -EINVAL;
+			goto out_unlock_bh;
+		}
 		spin_unlock(&jh->b_state_lock);
 	}
 	if (data_race(jh->b_modified == 1)) {
@@ -1531,13 +1558,15 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 			spin_lock(&jh->b_state_lock);
 			if (jh->b_transaction == transaction &&
 			    jh->b_jlist != BJ_Metadata)
-				pr_err("JBD2: assertion failure: h_type=%u "
-				       "h_line_no=%u block_no=%llu jlist=%u\n",
+				pr_err("JBD2: assertion failure: h_type=%u h_line_no=%u block_no=%llu jlist=%u\n",
 				       handle->h_type, handle->h_line_no,
 				       (unsigned long long) bh->b_blocknr,
 				       jh->b_jlist);
-			J_ASSERT_JH(jh, jh->b_transaction != transaction ||
-					jh->b_jlist == BJ_Metadata);
+			if (WARN_ON_ONCE(jh->b_transaction == transaction &&
+					 jh->b_jlist != BJ_Metadata)) {
+				ret = -EINVAL;
+				goto out_unlock_bh;
+			}
 			spin_unlock(&jh->b_state_lock);
 		}
 		goto out;
@@ -1557,8 +1586,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 		goto out_unlock_bh;
 	}
 
-	journal = transaction->t_journal;
-
 	if (jh->b_modified == 0) {
 		/*
 		 * This buffer's got modified and becoming part
@@ -1636,7 +1663,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 	}
 
 	/* That test should have eliminated the following case: */
-	J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
+	if (WARN_ON_ONCE(jh->b_frozen_data)) {
+		ret = -EINVAL;
+		goto out_unlock_bh;
+	}
 
 	JBUFFER_TRACE(jh, "file as BJ_Metadata");
 	spin_lock(&journal->j_list_lock);
@@ -1675,6 +1705,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
 	int err = 0;
 	int was_modified = 0;
 	int wait_for_writeback = 0;
+	int abort_journal = 0;
 
 	if (is_handle_aborted(handle))
 		return -EROFS;
@@ -1708,7 +1739,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
 	jh->b_modified = 0;
 
 	if (jh->b_transaction == transaction) {
-		J_ASSERT_JH(jh, !jh->b_frozen_data);
+		if (WARN_ON_ONCE(jh->b_frozen_data)) {
+			err = -EINVAL;
+			abort_journal = 1;
+			goto drop;
+		}
 
 		/* If we are forgetting a buffer which is already part
 		 * of this transaction, then we can just drop it from
@@ -1747,8 +1782,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
 		}
 		spin_unlock(&journal->j_list_lock);
 	} else if (jh->b_transaction) {
-		J_ASSERT_JH(jh, (jh->b_transaction ==
-				 journal->j_committing_transaction));
+		if (WARN_ON_ONCE(jh->b_transaction != journal->j_committing_transaction)) {
+			err = -EINVAL;
+			abort_journal = 1;
+			goto drop;
+		}
 		/* However, if the buffer is still owned by a prior
 		 * (committing) transaction, we can't drop it yet... */
 		JBUFFER_TRACE(jh, "belongs to older transaction");
@@ -1766,7 +1804,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
 			jh->b_next_transaction = transaction;
 			spin_unlock(&journal->j_list_lock);
 		} else {
-			J_ASSERT(jh->b_next_transaction == transaction);
+			if (WARN_ON_ONCE(jh->b_next_transaction != transaction)) {
+				err = -EINVAL;
+				abort_journal = 1;
+				goto drop;
+			}
 
 			/*
 			 * only drop a reference if this transaction modified
@@ -1812,6 +1854,8 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
 drop:
 	__brelse(bh);
 	spin_unlock(&jh->b_state_lock);
+	if (abort_journal)
+		jbd2_journal_abort(journal, err);
 	if (wait_for_writeback)
 		wait_on_buffer(bh);
 	jbd2_journal_put_journal_head(jh);
@@ -2136,7 +2180,8 @@ bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio)
 	struct buffer_head *bh;
 	bool ret = false;
 
-	J_ASSERT(folio_test_locked(folio));
+	if (WARN_ON_ONCE(!folio_test_locked(folio)))
+		return false;
 
 	head = folio_buffers(folio);
 	bh = head;
@@ -2651,6 +2696,8 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal;
+	int err = 0;
+	int abort_transaction = 0;
 
 	if (is_handle_aborted(handle))
 		return -EROFS;
@@ -2685,20 +2732,33 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
 	/* On some different transaction's list - should be
 	 * the committing one */
 	if (jinode->i_transaction) {
-		J_ASSERT(jinode->i_next_transaction == NULL);
-		J_ASSERT(jinode->i_transaction ==
-					journal->j_committing_transaction);
+		if (WARN_ON_ONCE(jinode->i_next_transaction ||
+				 jinode->i_transaction !=
+				 journal->j_committing_transaction)) {
+			pr_err("JBD2: %s: assertion failure: i_next_transaction=%p i_transaction=%p j_committing_transaction=%p\n",
+			       journal->j_devname, jinode->i_next_transaction,
+			       jinode->i_transaction,
+			       journal->j_committing_transaction);
+			err = -EINVAL;
+			abort_transaction = 1;
+			goto done;
+		}
 		jinode->i_next_transaction = transaction;
 		goto done;
 	}
 	/* Not on any transaction list... */
-	J_ASSERT(!jinode->i_next_transaction);
+	if (WARN_ON_ONCE(jinode->i_next_transaction)) {
+		err = -EINVAL;
+		abort_transaction = 1;
+		goto done;
+	}
 	jinode->i_transaction = transaction;
 	list_add(&jinode->i_list, &transaction->t_inode_list);
 done:
 	spin_unlock(&journal->j_list_lock);
-
-	return 0;
+	if (abort_transaction)
+		jbd2_journal_abort(journal, err);
+	return err;
 }
 
 int jbd2_journal_inode_ranged_write(handle_t *handle,
-- 
2.53.0
Re: [PATCH v4 2/2] jbd2: gracefully abort on transaction state corruptions
Posted by Zhang Yi 1 month ago
On 3/4/2026 2:01 AM, Milos Nikic wrote:
> Auditing the jbd2 codebase reveals several legacy J_ASSERT calls
> that enforce internal state machine invariants (e.g., verifying
> jh->b_transaction or jh->b_next_transaction pointers).
> 
> When these invariants are broken, the journal is in a corrupted
> state. However, triggering a fatal panic brings down the entire
> system for a localized filesystem error.
> 
> This patch targets a specific class of these asserts: those
> residing inside functions that natively return integer error codes,
> booleans, or error pointers. It replaces the hard J_ASSERTs with
> WARN_ON_ONCE to capture the offending stack trace, safely drops
> any held locks, gracefully aborts the journal, and returns -EINVAL.
> 
> This prevents a catastrophic kernel panic while ensuring the
> corrupted journal state is safely contained and upstream callers
> (like ext4 or ocfs2) can gracefully handle the aborted handle.
> 
> Functions modified in fs/jbd2/transaction.c:
> - jbd2__journal_start()
> - do_get_write_access()
> - jbd2_journal_dirty_metadata()
> - jbd2_journal_forget()
> - jbd2_journal_try_to_free_buffers()
> - jbd2_journal_file_inode()
> 
> Signed-off-by: Milos Nikic <nikic.milos@gmail.com>

Thank you for the improvement, this looks good to me.

Reviewed-by: Zhang Yi <yi.zhang@huawei.com>

> ---
>   fs/jbd2/transaction.c | 112 ++++++++++++++++++++++++++++++++----------
>   1 file changed, 86 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
> index 04d17a5f2a82..bae6c99d635c 100644
> --- a/fs/jbd2/transaction.c
> +++ b/fs/jbd2/transaction.c
> @@ -474,7 +474,8 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
>   		return ERR_PTR(-EROFS);
>   
>   	if (handle) {
> -		J_ASSERT(handle->h_transaction->t_journal == journal);
> +		if (WARN_ON_ONCE(handle->h_transaction->t_journal != journal))
> +			return ERR_PTR(-EINVAL);
>   		handle->h_ref++;
>   		return handle;
>   	}
> @@ -1036,7 +1037,13 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
>   	 */
>   	if (!jh->b_transaction) {
>   		JBUFFER_TRACE(jh, "no transaction");
> -		J_ASSERT_JH(jh, !jh->b_next_transaction);
> +		if (WARN_ON_ONCE(jh->b_next_transaction)) {
> +			spin_unlock(&jh->b_state_lock);
> +			unlock_buffer(bh);
> +			error = -EINVAL;
> +			jbd2_journal_abort(journal, error);
> +			goto out;
> +		}
>   		JBUFFER_TRACE(jh, "file as BJ_Reserved");
>   		/*
>   		 * Make sure all stores to jh (b_modified, b_frozen_data) are
> @@ -1069,13 +1076,27 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
>   	 */
>   	if (jh->b_frozen_data) {
>   		JBUFFER_TRACE(jh, "has frozen data");
> -		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
> +		if (WARN_ON_ONCE(jh->b_next_transaction)) {
> +			spin_unlock(&jh->b_state_lock);
> +			error = -EINVAL;
> +			jbd2_journal_abort(journal, error);
> +			goto out;
> +		}
>   		goto attach_next;
>   	}
>   
>   	JBUFFER_TRACE(jh, "owned by older transaction");
> -	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
> -	J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction);
> +	if (WARN_ON_ONCE(jh->b_next_transaction ||
> +			 jh->b_transaction !=
> +			 journal->j_committing_transaction)) {
> +		pr_err("JBD2: %s: assertion failure: b_next_transaction=%p b_transaction=%p j_committing_transaction=%p\n",
> +		       journal->j_devname, jh->b_next_transaction,
> +		       jh->b_transaction, journal->j_committing_transaction);
> +		spin_unlock(&jh->b_state_lock);
> +		error = -EINVAL;
> +		jbd2_journal_abort(journal, error);
> +		goto out;
> +	}
>   
>   	/*
>   	 * There is one case we have to be very careful about.  If the
> @@ -1496,7 +1517,7 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
>   int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
>   {
>   	transaction_t *transaction = handle->h_transaction;
> -	journal_t *journal;
> +	journal_t *journal = transaction->t_journal;
>   	struct journal_head *jh;
>   	int ret = 0;
>   
> @@ -1520,8 +1541,14 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
>   	if (data_race(jh->b_transaction != transaction &&
>   	    jh->b_next_transaction != transaction)) {
>   		spin_lock(&jh->b_state_lock);
> -		J_ASSERT_JH(jh, jh->b_transaction == transaction ||
> -				jh->b_next_transaction == transaction);
> +		if (WARN_ON_ONCE(jh->b_transaction != transaction &&
> +				 jh->b_next_transaction != transaction)) {
> +			pr_err("JBD2: %s: assertion failure: b_transaction=%p transaction=%p b_next_transaction=%p\n",
> +			       journal->j_devname, jh->b_transaction,
> +			       transaction, jh->b_next_transaction);
> +			ret = -EINVAL;
> +			goto out_unlock_bh;
> +		}
>   		spin_unlock(&jh->b_state_lock);
>   	}
>   	if (data_race(jh->b_modified == 1)) {
> @@ -1531,13 +1558,15 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
>   			spin_lock(&jh->b_state_lock);
>   			if (jh->b_transaction == transaction &&
>   			    jh->b_jlist != BJ_Metadata)
> -				pr_err("JBD2: assertion failure: h_type=%u "
> -				       "h_line_no=%u block_no=%llu jlist=%u\n",
> +				pr_err("JBD2: assertion failure: h_type=%u h_line_no=%u block_no=%llu jlist=%u\n",
>   				       handle->h_type, handle->h_line_no,
>   				       (unsigned long long) bh->b_blocknr,
>   				       jh->b_jlist);
> -			J_ASSERT_JH(jh, jh->b_transaction != transaction ||
> -					jh->b_jlist == BJ_Metadata);
> +			if (WARN_ON_ONCE(jh->b_transaction == transaction &&
> +					 jh->b_jlist != BJ_Metadata)) {
> +				ret = -EINVAL;
> +				goto out_unlock_bh;
> +			}
>   			spin_unlock(&jh->b_state_lock);
>   		}
>   		goto out;
> @@ -1557,8 +1586,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
>   		goto out_unlock_bh;
>   	}
>   
> -	journal = transaction->t_journal;
> -
>   	if (jh->b_modified == 0) {
>   		/*
>   		 * This buffer's got modified and becoming part
> @@ -1636,7 +1663,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
>   	}
>   
>   	/* That test should have eliminated the following case: */
> -	J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
> +	if (WARN_ON_ONCE(jh->b_frozen_data)) {
> +		ret = -EINVAL;
> +		goto out_unlock_bh;
> +	}
>   
>   	JBUFFER_TRACE(jh, "file as BJ_Metadata");
>   	spin_lock(&journal->j_list_lock);
> @@ -1675,6 +1705,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
>   	int err = 0;
>   	int was_modified = 0;
>   	int wait_for_writeback = 0;
> +	int abort_journal = 0;
>   
>   	if (is_handle_aborted(handle))
>   		return -EROFS;
> @@ -1708,7 +1739,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
>   	jh->b_modified = 0;
>   
>   	if (jh->b_transaction == transaction) {
> -		J_ASSERT_JH(jh, !jh->b_frozen_data);
> +		if (WARN_ON_ONCE(jh->b_frozen_data)) {
> +			err = -EINVAL;
> +			abort_journal = 1;
> +			goto drop;
> +		}
>   
>   		/* If we are forgetting a buffer which is already part
>   		 * of this transaction, then we can just drop it from
> @@ -1747,8 +1782,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
>   		}
>   		spin_unlock(&journal->j_list_lock);
>   	} else if (jh->b_transaction) {
> -		J_ASSERT_JH(jh, (jh->b_transaction ==
> -				 journal->j_committing_transaction));
> +		if (WARN_ON_ONCE(jh->b_transaction != journal->j_committing_transaction)) {
> +			err = -EINVAL;
> +			abort_journal = 1;
> +			goto drop;
> +		}
>   		/* However, if the buffer is still owned by a prior
>   		 * (committing) transaction, we can't drop it yet... */
>   		JBUFFER_TRACE(jh, "belongs to older transaction");
> @@ -1766,7 +1804,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
>   			jh->b_next_transaction = transaction;
>   			spin_unlock(&journal->j_list_lock);
>   		} else {
> -			J_ASSERT(jh->b_next_transaction == transaction);
> +			if (WARN_ON_ONCE(jh->b_next_transaction != transaction)) {
> +				err = -EINVAL;
> +				abort_journal = 1;
> +				goto drop;
> +			}
>   
>   			/*
>   			 * only drop a reference if this transaction modified
> @@ -1812,6 +1854,8 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
>   drop:
>   	__brelse(bh);
>   	spin_unlock(&jh->b_state_lock);
> +	if (abort_journal)
> +		jbd2_journal_abort(journal, err);
>   	if (wait_for_writeback)
>   		wait_on_buffer(bh);
>   	jbd2_journal_put_journal_head(jh);
> @@ -2136,7 +2180,8 @@ bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio)
>   	struct buffer_head *bh;
>   	bool ret = false;
>   
> -	J_ASSERT(folio_test_locked(folio));
> +	if (WARN_ON_ONCE(!folio_test_locked(folio)))
> +		return false;
>   
>   	head = folio_buffers(folio);
>   	bh = head;
> @@ -2651,6 +2696,8 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
>   {
>   	transaction_t *transaction = handle->h_transaction;
>   	journal_t *journal;
> +	int err = 0;
> +	int abort_transaction = 0;
>   
>   	if (is_handle_aborted(handle))
>   		return -EROFS;
> @@ -2685,20 +2732,33 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
>   	/* On some different transaction's list - should be
>   	 * the committing one */
>   	if (jinode->i_transaction) {
> -		J_ASSERT(jinode->i_next_transaction == NULL);
> -		J_ASSERT(jinode->i_transaction ==
> -					journal->j_committing_transaction);
> +		if (WARN_ON_ONCE(jinode->i_next_transaction ||
> +				 jinode->i_transaction !=
> +				 journal->j_committing_transaction)) {
> +			pr_err("JBD2: %s: assertion failure: i_next_transaction=%p i_transaction=%p j_committing_transaction=%p\n",
> +			       journal->j_devname, jinode->i_next_transaction,
> +			       jinode->i_transaction,
> +			       journal->j_committing_transaction);
> +			err = -EINVAL;
> +			abort_transaction = 1;
> +			goto done;
> +		}
>   		jinode->i_next_transaction = transaction;
>   		goto done;
>   	}
>   	/* Not on any transaction list... */
> -	J_ASSERT(!jinode->i_next_transaction);
> +	if (WARN_ON_ONCE(jinode->i_next_transaction)) {
> +		err = -EINVAL;
> +		abort_transaction = 1;
> +		goto done;
> +	}
>   	jinode->i_transaction = transaction;
>   	list_add(&jinode->i_list, &transaction->t_inode_list);
>   done:
>   	spin_unlock(&journal->j_list_lock);
> -
> -	return 0;
> +	if (abort_transaction)
> +		jbd2_journal_abort(journal, err);
> +	return err;
>   }
>   
>   int jbd2_journal_inode_ranged_write(handle_t *handle,
Re: [PATCH v4 2/2] jbd2: gracefully abort on transaction state corruptions
Posted by Jan Kara 1 month ago
On Tue 03-03-26 10:01:57, Milos Nikic wrote:
> Auditing the jbd2 codebase reveals several legacy J_ASSERT calls
> that enforce internal state machine invariants (e.g., verifying
> jh->b_transaction or jh->b_next_transaction pointers).
> 
> When these invariants are broken, the journal is in a corrupted
> state. However, triggering a fatal panic brings down the entire
> system for a localized filesystem error.
> 
> This patch targets a specific class of these asserts: those
> residing inside functions that natively return integer error codes,
> booleans, or error pointers. It replaces the hard J_ASSERTs with
> WARN_ON_ONCE to capture the offending stack trace, safely drops
> any held locks, gracefully aborts the journal, and returns -EINVAL.
> 
> This prevents a catastrophic kernel panic while ensuring the
> corrupted journal state is safely contained and upstream callers
> (like ext4 or ocfs2) can gracefully handle the aborted handle.
> 
> Functions modified in fs/jbd2/transaction.c:
> - jbd2__journal_start()
> - do_get_write_access()
> - jbd2_journal_dirty_metadata()
> - jbd2_journal_forget()
> - jbd2_journal_try_to_free_buffers()
> - jbd2_journal_file_inode()
> 
> Signed-off-by: Milos Nikic <nikic.milos@gmail.com>

Looks good. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

								Honza

> ---
>  fs/jbd2/transaction.c | 112 ++++++++++++++++++++++++++++++++----------
>  1 file changed, 86 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
> index 04d17a5f2a82..bae6c99d635c 100644
> --- a/fs/jbd2/transaction.c
> +++ b/fs/jbd2/transaction.c
> @@ -474,7 +474,8 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks,
>  		return ERR_PTR(-EROFS);
>  
>  	if (handle) {
> -		J_ASSERT(handle->h_transaction->t_journal == journal);
> +		if (WARN_ON_ONCE(handle->h_transaction->t_journal != journal))
> +			return ERR_PTR(-EINVAL);
>  		handle->h_ref++;
>  		return handle;
>  	}
> @@ -1036,7 +1037,13 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
>  	 */
>  	if (!jh->b_transaction) {
>  		JBUFFER_TRACE(jh, "no transaction");
> -		J_ASSERT_JH(jh, !jh->b_next_transaction);
> +		if (WARN_ON_ONCE(jh->b_next_transaction)) {
> +			spin_unlock(&jh->b_state_lock);
> +			unlock_buffer(bh);
> +			error = -EINVAL;
> +			jbd2_journal_abort(journal, error);
> +			goto out;
> +		}
>  		JBUFFER_TRACE(jh, "file as BJ_Reserved");
>  		/*
>  		 * Make sure all stores to jh (b_modified, b_frozen_data) are
> @@ -1069,13 +1076,27 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
>  	 */
>  	if (jh->b_frozen_data) {
>  		JBUFFER_TRACE(jh, "has frozen data");
> -		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
> +		if (WARN_ON_ONCE(jh->b_next_transaction)) {
> +			spin_unlock(&jh->b_state_lock);
> +			error = -EINVAL;
> +			jbd2_journal_abort(journal, error);
> +			goto out;
> +		}
>  		goto attach_next;
>  	}
>  
>  	JBUFFER_TRACE(jh, "owned by older transaction");
> -	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
> -	J_ASSERT_JH(jh, jh->b_transaction == journal->j_committing_transaction);
> +	if (WARN_ON_ONCE(jh->b_next_transaction ||
> +			 jh->b_transaction !=
> +			 journal->j_committing_transaction)) {
> +		pr_err("JBD2: %s: assertion failure: b_next_transaction=%p b_transaction=%p j_committing_transaction=%p\n",
> +		       journal->j_devname, jh->b_next_transaction,
> +		       jh->b_transaction, journal->j_committing_transaction);
> +		spin_unlock(&jh->b_state_lock);
> +		error = -EINVAL;
> +		jbd2_journal_abort(journal, error);
> +		goto out;
> +	}
>  
>  	/*
>  	 * There is one case we have to be very careful about.  If the
> @@ -1496,7 +1517,7 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh,
>  int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
>  {
>  	transaction_t *transaction = handle->h_transaction;
> -	journal_t *journal;
> +	journal_t *journal = transaction->t_journal;
>  	struct journal_head *jh;
>  	int ret = 0;
>  
> @@ -1520,8 +1541,14 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
>  	if (data_race(jh->b_transaction != transaction &&
>  	    jh->b_next_transaction != transaction)) {
>  		spin_lock(&jh->b_state_lock);
> -		J_ASSERT_JH(jh, jh->b_transaction == transaction ||
> -				jh->b_next_transaction == transaction);
> +		if (WARN_ON_ONCE(jh->b_transaction != transaction &&
> +				 jh->b_next_transaction != transaction)) {
> +			pr_err("JBD2: %s: assertion failure: b_transaction=%p transaction=%p b_next_transaction=%p\n",
> +			       journal->j_devname, jh->b_transaction,
> +			       transaction, jh->b_next_transaction);
> +			ret = -EINVAL;
> +			goto out_unlock_bh;
> +		}
>  		spin_unlock(&jh->b_state_lock);
>  	}
>  	if (data_race(jh->b_modified == 1)) {
> @@ -1531,13 +1558,15 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
>  			spin_lock(&jh->b_state_lock);
>  			if (jh->b_transaction == transaction &&
>  			    jh->b_jlist != BJ_Metadata)
> -				pr_err("JBD2: assertion failure: h_type=%u "
> -				       "h_line_no=%u block_no=%llu jlist=%u\n",
> +				pr_err("JBD2: assertion failure: h_type=%u h_line_no=%u block_no=%llu jlist=%u\n",
>  				       handle->h_type, handle->h_line_no,
>  				       (unsigned long long) bh->b_blocknr,
>  				       jh->b_jlist);
> -			J_ASSERT_JH(jh, jh->b_transaction != transaction ||
> -					jh->b_jlist == BJ_Metadata);
> +			if (WARN_ON_ONCE(jh->b_transaction == transaction &&
> +					 jh->b_jlist != BJ_Metadata)) {
> +				ret = -EINVAL;
> +				goto out_unlock_bh;
> +			}
>  			spin_unlock(&jh->b_state_lock);
>  		}
>  		goto out;
> @@ -1557,8 +1586,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
>  		goto out_unlock_bh;
>  	}
>  
> -	journal = transaction->t_journal;
> -
>  	if (jh->b_modified == 0) {
>  		/*
>  		 * This buffer's got modified and becoming part
> @@ -1636,7 +1663,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
>  	}
>  
>  	/* That test should have eliminated the following case: */
> -	J_ASSERT_JH(jh, jh->b_frozen_data == NULL);
> +	if (WARN_ON_ONCE(jh->b_frozen_data)) {
> +		ret = -EINVAL;
> +		goto out_unlock_bh;
> +	}
>  
>  	JBUFFER_TRACE(jh, "file as BJ_Metadata");
>  	spin_lock(&journal->j_list_lock);
> @@ -1675,6 +1705,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
>  	int err = 0;
>  	int was_modified = 0;
>  	int wait_for_writeback = 0;
> +	int abort_journal = 0;
>  
>  	if (is_handle_aborted(handle))
>  		return -EROFS;
> @@ -1708,7 +1739,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
>  	jh->b_modified = 0;
>  
>  	if (jh->b_transaction == transaction) {
> -		J_ASSERT_JH(jh, !jh->b_frozen_data);
> +		if (WARN_ON_ONCE(jh->b_frozen_data)) {
> +			err = -EINVAL;
> +			abort_journal = 1;
> +			goto drop;
> +		}
>  
>  		/* If we are forgetting a buffer which is already part
>  		 * of this transaction, then we can just drop it from
> @@ -1747,8 +1782,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
>  		}
>  		spin_unlock(&journal->j_list_lock);
>  	} else if (jh->b_transaction) {
> -		J_ASSERT_JH(jh, (jh->b_transaction ==
> -				 journal->j_committing_transaction));
> +		if (WARN_ON_ONCE(jh->b_transaction != journal->j_committing_transaction)) {
> +			err = -EINVAL;
> +			abort_journal = 1;
> +			goto drop;
> +		}
>  		/* However, if the buffer is still owned by a prior
>  		 * (committing) transaction, we can't drop it yet... */
>  		JBUFFER_TRACE(jh, "belongs to older transaction");
> @@ -1766,7 +1804,11 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
>  			jh->b_next_transaction = transaction;
>  			spin_unlock(&journal->j_list_lock);
>  		} else {
> -			J_ASSERT(jh->b_next_transaction == transaction);
> +			if (WARN_ON_ONCE(jh->b_next_transaction != transaction)) {
> +				err = -EINVAL;
> +				abort_journal = 1;
> +				goto drop;
> +			}
>  
>  			/*
>  			 * only drop a reference if this transaction modified
> @@ -1812,6 +1854,8 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
>  drop:
>  	__brelse(bh);
>  	spin_unlock(&jh->b_state_lock);
> +	if (abort_journal)
> +		jbd2_journal_abort(journal, err);
>  	if (wait_for_writeback)
>  		wait_on_buffer(bh);
>  	jbd2_journal_put_journal_head(jh);
> @@ -2136,7 +2180,8 @@ bool jbd2_journal_try_to_free_buffers(journal_t *journal, struct folio *folio)
>  	struct buffer_head *bh;
>  	bool ret = false;
>  
> -	J_ASSERT(folio_test_locked(folio));
> +	if (WARN_ON_ONCE(!folio_test_locked(folio)))
> +		return false;
>  
>  	head = folio_buffers(folio);
>  	bh = head;
> @@ -2651,6 +2696,8 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
>  {
>  	transaction_t *transaction = handle->h_transaction;
>  	journal_t *journal;
> +	int err = 0;
> +	int abort_transaction = 0;
>  
>  	if (is_handle_aborted(handle))
>  		return -EROFS;
> @@ -2685,20 +2732,33 @@ static int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode,
>  	/* On some different transaction's list - should be
>  	 * the committing one */
>  	if (jinode->i_transaction) {
> -		J_ASSERT(jinode->i_next_transaction == NULL);
> -		J_ASSERT(jinode->i_transaction ==
> -					journal->j_committing_transaction);
> +		if (WARN_ON_ONCE(jinode->i_next_transaction ||
> +				 jinode->i_transaction !=
> +				 journal->j_committing_transaction)) {
> +			pr_err("JBD2: %s: assertion failure: i_next_transaction=%p i_transaction=%p j_committing_transaction=%p\n",
> +			       journal->j_devname, jinode->i_next_transaction,
> +			       jinode->i_transaction,
> +			       journal->j_committing_transaction);
> +			err = -EINVAL;
> +			abort_transaction = 1;
> +			goto done;
> +		}
>  		jinode->i_next_transaction = transaction;
>  		goto done;
>  	}
>  	/* Not on any transaction list... */
> -	J_ASSERT(!jinode->i_next_transaction);
> +	if (WARN_ON_ONCE(jinode->i_next_transaction)) {
> +		err = -EINVAL;
> +		abort_transaction = 1;
> +		goto done;
> +	}
>  	jinode->i_transaction = transaction;
>  	list_add(&jinode->i_list, &transaction->t_inode_list);
>  done:
>  	spin_unlock(&journal->j_list_lock);
> -
> -	return 0;
> +	if (abort_transaction)
> +		jbd2_journal_abort(journal, err);
> +	return err;
>  }
>  
>  int jbd2_journal_inode_ranged_write(handle_t *handle,
> -- 
> 2.53.0
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR
Re: [PATCH v4 2/2] jbd2: gracefully abort on transaction state corruptions
Posted by Andreas Dilger 1 month ago
On Mar 3, 2026, at 11:01, Milos Nikic <nikic.milos@gmail.com> wrote:
> 
> Auditing the jbd2 codebase reveals several legacy J_ASSERT calls
> that enforce internal state machine invariants (e.g., verifying
> jh->b_transaction or jh->b_next_transaction pointers).
> 
> When these invariants are broken, the journal is in a corrupted
> state. However, triggering a fatal panic brings down the entire
> system for a localized filesystem error.
> 
> This patch targets a specific class of these asserts: those
> residing inside functions that natively return integer error codes,
> booleans, or error pointers. It replaces the hard J_ASSERTs with
> WARN_ON_ONCE to capture the offending stack trace, safely drops
> any held locks, gracefully aborts the journal, and returns -EINVAL.
> 
> This prevents a catastrophic kernel panic while ensuring the
> corrupted journal state is safely contained and upstream callers
> (like ext4 or ocfs2) can gracefully handle the aborted handle.
> 
> Functions modified in fs/jbd2/transaction.c:
> - jbd2__journal_start()
> - do_get_write_access()
> - jbd2_journal_dirty_metadata()
> - jbd2_journal_forget()
> - jbd2_journal_try_to_free_buffers()
> - jbd2_journal_file_inode()
> 
> Signed-off-by: Milos Nikic <nikic.milos@gmail.com>

Looks good, with one minor suggestion.  Either way you could add:

Reviewed-by: Andreas Dilger <adilger@dilger.ca <mailto:adilger@dilger.ca>>

> @@ -1531,13 +1558,15 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
>  			spin_lock(&jh->b_state_lock);
>  			if (jh->b_transaction == transaction &&
>  			    jh->b_jlist != BJ_Metadata)
> -				pr_err("JBD2: assertion failure: h_type=%u "
> - 				       "h_line_no=%u block_no=%llu jlist=%u\n",
> +				pr_err("JBD2: assertion failure: h_type=%u h_line_no=%u block_no=%llu jlist=%u\n",
>  				       handle->h_type, handle->h_line_no,
>  				       (unsigned long long) bh->b_blocknr,
>  				       jh->b_jlist);
> -			J_ASSERT_JH(jh, jh->b_transaction != transaction ||
> -				    jh->b_jlist == BJ_Metadata);
> +			if (WARN_ON_ONCE(jh->b_transaction == transaction &&
> +					 jh->b_jlist != BJ_Metadata)) {
> +				ret = -EINVAL;
> +				goto out_unlock_bh;
> +			}
>  			spin_unlock(&jh->b_state_lock);
>  		}

It doesn't make sense to check these conditions twice.  That was needed with
the J_ASSERT_JH() calls, but it is now possible to put the pr_err() calls
inside "if (WARN_ON_ONCE(...))" as it is done in other parts of the patch.

Cheers, Andreas