[v3] ext4: fix insufficient credits when writing back large folios

[PATCH v3 04/10] ext4: refactor the block allocation process of ext4_page_mkwrite()

Posted by Zhang Yi 7 months, 1 week ago

From: Zhang Yi <yi.zhang@huawei.com>

The block allocation process and error handling in ext4_page_mkwrite()
is complex now. Refactor it by introducing a new helper function,
ext4_block_page_mkwrite(). It will call ext4_block_write_begin() to
allocate blocks instead of directly calling block_page_mkwrite().
Preparing to implement retry logic in a subsequent patch to address
situations where the reserved journal credits are insufficient.
Additionally, this modification will help prevent potential deadlocks
that may occur when waiting for folio writeback while holding the
transaction handle.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/ext4/inode.c | 95 ++++++++++++++++++++++++++-----------------------
 1 file changed, 50 insertions(+), 45 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 62f1263d05da..31731a732df2 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -6605,6 +6605,53 @@ static int ext4_bh_unmapped(handle_t *handle, struct inode *inode,
 	return !buffer_mapped(bh);
 }
 
+static int ext4_block_page_mkwrite(struct inode *inode, struct folio *folio,
+				   get_block_t get_block)
+{
+	handle_t *handle;
+	loff_t size;
+	unsigned long len;
+	int ret;
+
+	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
+				    ext4_writepage_trans_blocks(inode));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	folio_lock(folio);
+	size = i_size_read(inode);
+	/* Page got truncated from under us? */
+	if (folio->mapping != inode->i_mapping || folio_pos(folio) > size) {
+		ret = -EFAULT;
+		goto out_error;
+	}
+
+	len = folio_size(folio);
+	if (folio_pos(folio) + len > size)
+		len = size - folio_pos(folio);
+
+	ret = ext4_block_write_begin(handle, folio, 0, len, get_block);
+	if (ret)
+		goto out_error;
+
+	if (!ext4_should_journal_data(inode)) {
+		block_commit_write(folio, 0, len);
+		folio_mark_dirty(folio);
+	} else {
+		ret = ext4_journal_folio_buffers(handle, folio, len);
+		if (ret)
+			goto out_error;
+	}
+	ext4_journal_stop(handle);
+	folio_wait_stable(folio);
+	return ret;
+
+out_error:
+	folio_unlock(folio);
+	ext4_journal_stop(handle);
+	return ret;
+}
+
 vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
 {
 	struct vm_area_struct *vma = vmf->vma;
@@ -6616,8 +6663,7 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
 	struct file *file = vma->vm_file;
 	struct inode *inode = file_inode(file);
 	struct address_space *mapping = inode->i_mapping;
-	handle_t *handle;
-	get_block_t *get_block;
+	get_block_t *get_block = ext4_get_block;
 	int retries = 0;
 
 	if (unlikely(IS_IMMUTABLE(inode)))
@@ -6685,46 +6731,9 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
 	/* OK, we need to fill the hole... */
 	if (ext4_should_dioread_nolock(inode))
 		get_block = ext4_get_block_unwritten;
-	else
-		get_block = ext4_get_block;
 retry_alloc:
-	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
-				    ext4_writepage_trans_blocks(inode));
-	if (IS_ERR(handle)) {
-		ret = VM_FAULT_SIGBUS;
-		goto out;
-	}
-	/*
-	 * Data journalling can't use block_page_mkwrite() because it
-	 * will set_buffer_dirty() before do_journal_get_write_access()
-	 * thus might hit warning messages for dirty metadata buffers.
-	 */
-	if (!ext4_should_journal_data(inode)) {
-		err = block_page_mkwrite(vma, vmf, get_block);
-	} else {
-		folio_lock(folio);
-		size = i_size_read(inode);
-		/* Page got truncated from under us? */
-		if (folio->mapping != mapping || folio_pos(folio) > size) {
-			ret = VM_FAULT_NOPAGE;
-			goto out_error;
-		}
-
-		len = folio_size(folio);
-		if (folio_pos(folio) + len > size)
-			len = size - folio_pos(folio);
-
-		err = ext4_block_write_begin(handle, folio, 0, len,
-					     ext4_get_block);
-		if (!err) {
-			ret = VM_FAULT_SIGBUS;
-			if (ext4_journal_folio_buffers(handle, folio, len))
-				goto out_error;
-		} else {
-			folio_unlock(folio);
-		}
-	}
-	ext4_journal_stop(handle);
+	/* Start jorunal and allocate blocks */
+	err = ext4_block_page_mkwrite(inode, folio, get_block);
 	if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry_alloc;
 out_ret:
@@ -6733,8 +6742,4 @@ vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf)
 	filemap_invalidate_unlock_shared(mapping);
 	sb_end_pagefault(inode->i_sb);
 	return ret;
-out_error:
-	folio_unlock(folio);
-	ext4_journal_stop(handle);
-	goto out;
 }
-- 
2.46.1

Re: [PATCH v3 04/10] ext4: refactor the block allocation process of ext4_page_mkwrite()

Posted by Jan Kara 7 months, 1 week ago

On Tue 01-07-25 21:06:29, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@huawei.com>
> 
> The block allocation process and error handling in ext4_page_mkwrite()
> is complex now. Refactor it by introducing a new helper function,
> ext4_block_page_mkwrite(). It will call ext4_block_write_begin() to
> allocate blocks instead of directly calling block_page_mkwrite().
> Preparing to implement retry logic in a subsequent patch to address
> situations where the reserved journal credits are insufficient.
> Additionally, this modification will help prevent potential deadlocks
> that may occur when waiting for folio writeback while holding the
> transaction handle.
> 
> Suggested-by: Jan Kara <jack@suse.cz>
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>

Looks good! Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

One typo fix below:

> +	/* Start jorunal and allocate blocks */
		 ^^^ journal

> +	err = ext4_block_page_mkwrite(inode, folio, get_block);
>  	if (err == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
>  		goto retry_alloc;
>  out_ret:

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR