[PATCH v2 09/10] ext4: move zero partial block range functions out of active handle

Zhang Yi posted 10 patches 1 week, 1 day ago
There is a newer version of this series
[PATCH v2 09/10] ext4: move zero partial block range functions out of active handle
Posted by Zhang Yi 1 week, 1 day ago
From: Zhang Yi <yi.zhang@huawei.com>

Move ext4_block_zero_eof() and ext4_zero_partial_blocks() calls out of
the active handle context, making them independent operations. This is
safe because it still ensures data is updated before metadata for
data=ordered mode and data=journal mode because we still zero data and
ordering data before modifying the metadata.

This change is required for iomap infrastructure conversion because the
iomap buffered I/O path does not use the same journal infrastructure for
partial block zeroing. The lock ordering of folio lock and starting
transactions is "folio lock -> transaction start", which is opposite of
the current path. Therefore, zeroing partial blocks cannot be performed
under the active handle.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/ext4/extents.c | 29 ++++++++++++-----------------
 fs/ext4/inode.c   | 36 ++++++++++++++++++------------------
 2 files changed, 30 insertions(+), 35 deletions(-)

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 57a686b600d9..81b9d5b4ad71 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4585,6 +4585,10 @@ static int ext4_alloc_file_blocks(struct file *file, loff_t offset, loff_t len,
 	credits = ext4_chunk_trans_blocks(inode, len_lblk);
 	depth = ext_depth(inode);
 
+	/* Zero to the end of the block containing i_size */
+	if (new_size && offset > old_size)
+		ext4_block_zero_eof(inode, old_size, LLONG_MAX);
+
 retry:
 	while (len_lblk) {
 		/*
@@ -4623,10 +4627,8 @@ static int ext4_alloc_file_blocks(struct file *file, loff_t offset, loff_t len,
 			if (ext4_update_inode_size(inode, epos) & 0x1)
 				inode_set_mtime_to_ts(inode,
 						      inode_get_ctime(inode));
-			if (epos > old_size) {
+			if (epos > old_size)
 				pagecache_isize_extended(inode, old_size, epos);
-				ext4_block_zero_eof(inode, old_size, epos);
-			}
 		}
 		ret2 = ext4_mark_inode_dirty(handle, inode);
 		ext4_update_inode_fsync_trans(handle, inode, 1);
@@ -4668,7 +4670,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 	loff_t align_start, align_end, new_size = 0;
 	loff_t end = offset + len;
 	unsigned int blocksize = i_blocksize(inode);
-	int ret, flags, credits;
+	int ret, flags;
 
 	trace_ext4_zero_range(inode, offset, len, mode);
 	WARN_ON_ONCE(!inode_is_locked(inode));
@@ -4722,25 +4724,18 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 	if (IS_ALIGNED(offset | end, blocksize))
 		return ret;
 
-	/*
-	 * In worst case we have to writeout two nonadjacent unwritten
-	 * blocks and update the inode
-	 */
-	credits = (2 * ext4_ext_index_trans_blocks(inode, 2)) + 1;
-	if (ext4_should_journal_data(inode))
-		credits += 2;
-	handle = ext4_journal_start(inode, EXT4_HT_MISC, credits);
+	/* Zero out partial block at the edges of the range */
+	ret = ext4_zero_partial_blocks(inode, offset, len);
+	if (ret)
+		return ret;
+
+	handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		ext4_std_error(inode->i_sb, ret);
 		return ret;
 	}
 
-	/* Zero out partial block at the edges of the range */
-	ret = ext4_zero_partial_blocks(inode, offset, len);
-	if (ret)
-		goto out_handle;
-
 	if (new_size)
 		ext4_update_inode_size(inode, new_size);
 	ret = ext4_mark_inode_dirty(handle, inode);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f68b2afdcfcb..530197a53208 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4442,8 +4442,12 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 	if (ret)
 		return ret;
 
+	ret = ext4_zero_partial_blocks(inode, offset, length);
+	if (ret)
+		return ret;
+
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
-		credits = ext4_chunk_trans_extent(inode, 2);
+		credits = ext4_chunk_trans_extent(inode, 0);
 	else
 		credits = ext4_blocks_for_truncate(inode);
 	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
@@ -4453,10 +4457,6 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 		return ret;
 	}
 
-	ret = ext4_zero_partial_blocks(inode, offset, length);
-	if (ret)
-		goto out_handle;
-
 	/* If there are blocks to remove, do it */
 	start_lblk = EXT4_B_TO_LBLK(inode, offset);
 	end_lblk = end >> inode->i_blkbits;
@@ -4588,6 +4588,9 @@ int ext4_truncate(struct inode *inode)
 		err = ext4_inode_attach_jinode(inode);
 		if (err)
 			goto out_trace;
+
+		/* Zero to the end of the block containing i_size */
+		ext4_block_zero_eof(inode, inode->i_size, LLONG_MAX);
 	}
 
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
@@ -4601,10 +4604,6 @@ int ext4_truncate(struct inode *inode)
 		goto out_trace;
 	}
 
-	/* Zero to the end of the block containing i_size */
-	if (inode->i_size & (inode->i_sb->s_blocksize - 1))
-		ext4_block_zero_eof(inode, inode->i_size, LLONG_MAX);
-
 	/*
 	 * We add the inode to the orphan list, so that if this
 	 * truncate spans multiple transactions, and we crash, we will
@@ -5962,15 +5961,6 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 					goto out_mmap_sem;
 			}
 
-			handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
-			if (IS_ERR(handle)) {
-				error = PTR_ERR(handle);
-				goto out_mmap_sem;
-			}
-			if (ext4_handle_valid(handle) && shrink) {
-				error = ext4_orphan_add(handle, inode);
-				orphan = 1;
-			}
 			/*
 			 * Update c/mtime and tail zero the EOF folio on
 			 * truncate up. ext4_truncate() handles the shrink case
@@ -5984,6 +5974,16 @@ int ext4_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
 							    LLONG_MAX);
 			}
 
+			handle = ext4_journal_start(inode, EXT4_HT_INODE, 3);
+			if (IS_ERR(handle)) {
+				error = PTR_ERR(handle);
+				goto out_mmap_sem;
+			}
+			if (ext4_handle_valid(handle) && shrink) {
+				error = ext4_orphan_add(handle, inode);
+				orphan = 1;
+			}
+
 			if (shrink)
 				ext4_fc_track_range(handle, inode,
 					(attr->ia_size > 0 ? attr->ia_size - 1 : 0) >>
-- 
2.52.0