[PATCH v4 09/13] ext4: ensure zeroed partial blocks are persisted in SYNC mode

Zhang Yi posted 13 patches 6 days, 6 hours ago
[PATCH v4 09/13] ext4: ensure zeroed partial blocks are persisted in SYNC mode
Posted by Zhang Yi 6 days, 6 hours ago
From: Zhang Yi <yi.zhang@huawei.com>

In ext4_zero_range() and ext4_punch_hole(), when operating in SYNC mode
and zeroing a partial block, only data=journal modes guarantee that the
zeroed data is synchronously persisted after the operation completes.
For data=ordered/writeback mode and non-journal modes, this guarantee is
missing.

Introduce a partial_zero parameter to explicitly trigger writeback for
all scenarios where a partial block is zeroed, ensuring the zeroed data
is durably persisted.

Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/ext4/ext4.h    |  2 +-
 fs/ext4/extents.c |  9 ++++++++-
 fs/ext4/inode.c   | 19 ++++++++++++++-----
 3 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 859ae05339ad..bfe86479a83c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3101,7 +3101,7 @@ extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
 				  int pextents);
 extern int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end);
 extern int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart,
-				    loff_t length);
+				    loff_t length, bool *did_zero);
 extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
 extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 002b1ec8cee2..16386f499138 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4673,6 +4673,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 	loff_t align_start, align_end, new_size = 0;
 	loff_t end = offset + len;
 	unsigned int blocksize = i_blocksize(inode);
+	bool partial_zeroed = false;
 	int ret, flags;
 
 	trace_ext4_zero_range(inode, offset, len, mode);
@@ -4728,9 +4729,15 @@ static long ext4_zero_range(struct file *file, loff_t offset,
 		return ret;
 
 	/* Zero out partial block at the edges of the range */
-	ret = ext4_zero_partial_blocks(inode, offset, len);
+	ret = ext4_zero_partial_blocks(inode, offset, len, &partial_zeroed);
 	if (ret)
 		return ret;
+	if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
+		ret = filemap_write_and_wait_range(inode->i_mapping, offset,
+						   end - 1);
+		if (ret)
+			return ret;
+	}
 
 	handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
 	if (IS_ERR(handle)) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 8aa4369e3150..b934ad86a96d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -4227,7 +4227,8 @@ int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end)
 	return 0;
 }
 
-int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length)
+int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length,
+			     bool *did_zero)
 {
 	struct super_block *sb = inode->i_sb;
 	unsigned partial_start, partial_end;
@@ -4244,20 +4245,21 @@ int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length)
 	/* Handle partial zero within the single block */
 	if (start == end &&
 	    (partial_start || (partial_end != sb->s_blocksize - 1))) {
-		err = ext4_block_zero_range(inode, lstart, length, NULL, NULL);
+		err = ext4_block_zero_range(inode, lstart, length, did_zero,
+					    NULL);
 		return err;
 	}
 	/* Handle partial zero out on the start of the range */
 	if (partial_start) {
 		err = ext4_block_zero_range(inode, lstart, sb->s_blocksize,
-					    NULL, NULL);
+					    did_zero, NULL);
 		if (err)
 			return err;
 	}
 	/* Handle partial zero out on the end of the range */
 	if (partial_end != sb->s_blocksize - 1)
 		err = ext4_block_zero_range(inode, byte_end - partial_end,
-					    partial_end + 1, NULL, NULL);
+					    partial_end + 1, did_zero, NULL);
 	return err;
 }
 
@@ -4406,6 +4408,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 	loff_t end = offset + length;
 	handle_t *handle;
 	unsigned int credits;
+	bool partial_zeroed = false;
 	int ret;
 
 	trace_ext4_punch_hole(inode, offset, length, 0);
@@ -4441,9 +4444,15 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
 	if (ret)
 		return ret;
 
-	ret = ext4_zero_partial_blocks(inode, offset, length);
+	ret = ext4_zero_partial_blocks(inode, offset, length, &partial_zeroed);
 	if (ret)
 		return ret;
+	if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
+		ret = filemap_write_and_wait_range(inode->i_mapping, offset,
+						   end - 1);
+		if (ret)
+			return ret;
+	}
 
 	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
 		credits = ext4_chunk_trans_extent(inode, 0);
-- 
2.52.0
Re: [PATCH v4 09/13] ext4: ensure zeroed partial blocks are persisted in SYNC mode
Posted by Jan Kara 1 day ago
On Fri 27-03-26 18:29:35, Zhang Yi wrote:
> From: Zhang Yi <yi.zhang@huawei.com>
> 
> In ext4_zero_range() and ext4_punch_hole(), when operating in SYNC mode
> and zeroing a partial block, only data=journal modes guarantee that the
> zeroed data is synchronously persisted after the operation completes.
> For data=ordered/writeback mode and non-journal modes, this guarantee is
> missing.
> 
> Introduce a partial_zero parameter to explicitly trigger writeback for
> all scenarios where a partial block is zeroed, ensuring the zeroed data
> is durably persisted.
> 
> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
> ---
>  fs/ext4/ext4.h    |  2 +-
>  fs/ext4/extents.c |  9 ++++++++-
>  fs/ext4/inode.c   | 19 ++++++++++++++-----
>  3 files changed, 23 insertions(+), 7 deletions(-)
> 
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index 859ae05339ad..bfe86479a83c 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -3101,7 +3101,7 @@ extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
>  				  int pextents);
>  extern int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end);
>  extern int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart,
> -				    loff_t length);
> +				    loff_t length, bool *did_zero);
>  extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
>  extern qsize_t *ext4_get_reserved_space(struct inode *inode);
>  extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
> index 002b1ec8cee2..16386f499138 100644
> --- a/fs/ext4/extents.c
> +++ b/fs/ext4/extents.c
> @@ -4673,6 +4673,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
>  	loff_t align_start, align_end, new_size = 0;
>  	loff_t end = offset + len;
>  	unsigned int blocksize = i_blocksize(inode);
> +	bool partial_zeroed = false;
>  	int ret, flags;
>  
>  	trace_ext4_zero_range(inode, offset, len, mode);
> @@ -4728,9 +4729,15 @@ static long ext4_zero_range(struct file *file, loff_t offset,
>  		return ret;
>  
>  	/* Zero out partial block at the edges of the range */
> -	ret = ext4_zero_partial_blocks(inode, offset, len);
> +	ret = ext4_zero_partial_blocks(inode, offset, len, &partial_zeroed);
>  	if (ret)
>  		return ret;
> +	if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
> +		ret = filemap_write_and_wait_range(inode->i_mapping, offset,
> +						   end - 1);
> +		if (ret)
> +			return ret;
> +	}

Shouldn't we handle this somewhat below, where we do:

        if (file->f_flags & O_SYNC)
                ext4_handle_sync(handle);

It would be logical to keep these together... Similarly for
ext4_punch_hole() below. An yes, the check when calling ext4_handle_sync()
should be extended with an IS_SYNC() check, which is a preexisting bug.

								Honza

>  
>  	handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
>  	if (IS_ERR(handle)) {
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 8aa4369e3150..b934ad86a96d 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4227,7 +4227,8 @@ int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end)
>  	return 0;
>  }
>  
> -int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length)
> +int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length,
> +			     bool *did_zero)
>  {
>  	struct super_block *sb = inode->i_sb;
>  	unsigned partial_start, partial_end;
> @@ -4244,20 +4245,21 @@ int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length)
>  	/* Handle partial zero within the single block */
>  	if (start == end &&
>  	    (partial_start || (partial_end != sb->s_blocksize - 1))) {
> -		err = ext4_block_zero_range(inode, lstart, length, NULL, NULL);
> +		err = ext4_block_zero_range(inode, lstart, length, did_zero,
> +					    NULL);
>  		return err;
>  	}
>  	/* Handle partial zero out on the start of the range */
>  	if (partial_start) {
>  		err = ext4_block_zero_range(inode, lstart, sb->s_blocksize,
> -					    NULL, NULL);
> +					    did_zero, NULL);
>  		if (err)
>  			return err;
>  	}
>  	/* Handle partial zero out on the end of the range */
>  	if (partial_end != sb->s_blocksize - 1)
>  		err = ext4_block_zero_range(inode, byte_end - partial_end,
> -					    partial_end + 1, NULL, NULL);
> +					    partial_end + 1, did_zero, NULL);
>  	return err;
>  }
>  
> @@ -4406,6 +4408,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
>  	loff_t end = offset + length;
>  	handle_t *handle;
>  	unsigned int credits;
> +	bool partial_zeroed = false;
>  	int ret;
>  
>  	trace_ext4_punch_hole(inode, offset, length, 0);
> @@ -4441,9 +4444,15 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
>  	if (ret)
>  		return ret;
>  
> -	ret = ext4_zero_partial_blocks(inode, offset, length);
> +	ret = ext4_zero_partial_blocks(inode, offset, length, &partial_zeroed);
>  	if (ret)
>  		return ret;
> +	if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
> +		ret = filemap_write_and_wait_range(inode->i_mapping, offset,
> +						   end - 1);
> +		if (ret)
> +			return ret;
> +	}
>  
>  	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
>  		credits = ext4_chunk_trans_extent(inode, 0);
> -- 
> 2.52.0
> 
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR
Re: [PATCH v4 09/13] ext4: ensure zeroed partial blocks are persisted in SYNC mode
Posted by Zhang Yi 16 hours ago
On 4/2/2026 1:06 AM, Jan Kara wrote:
> On Fri 27-03-26 18:29:35, Zhang Yi wrote:
>> From: Zhang Yi <yi.zhang@huawei.com>
>>
>> In ext4_zero_range() and ext4_punch_hole(), when operating in SYNC mode
>> and zeroing a partial block, only data=journal modes guarantee that the
>> zeroed data is synchronously persisted after the operation completes.
>> For data=ordered/writeback mode and non-journal modes, this guarantee is
>> missing.
>>
>> Introduce a partial_zero parameter to explicitly trigger writeback for
>> all scenarios where a partial block is zeroed, ensuring the zeroed data
>> is durably persisted.
>>
>> Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
>> ---
>>  fs/ext4/ext4.h    |  2 +-
>>  fs/ext4/extents.c |  9 ++++++++-
>>  fs/ext4/inode.c   | 19 ++++++++++++++-----
>>  3 files changed, 23 insertions(+), 7 deletions(-)
>>
>> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
>> index 859ae05339ad..bfe86479a83c 100644
>> --- a/fs/ext4/ext4.h
>> +++ b/fs/ext4/ext4.h
>> @@ -3101,7 +3101,7 @@ extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
>>  				  int pextents);
>>  extern int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end);
>>  extern int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart,
>> -				    loff_t length);
>> +				    loff_t length, bool *did_zero);
>>  extern vm_fault_t ext4_page_mkwrite(struct vm_fault *vmf);
>>  extern qsize_t *ext4_get_reserved_space(struct inode *inode);
>>  extern int ext4_get_projid(struct inode *inode, kprojid_t *projid);
>> diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
>> index 002b1ec8cee2..16386f499138 100644
>> --- a/fs/ext4/extents.c
>> +++ b/fs/ext4/extents.c
>> @@ -4673,6 +4673,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
>>  	loff_t align_start, align_end, new_size = 0;
>>  	loff_t end = offset + len;
>>  	unsigned int blocksize = i_blocksize(inode);
>> +	bool partial_zeroed = false;
>>  	int ret, flags;
>>  
>>  	trace_ext4_zero_range(inode, offset, len, mode);
>> @@ -4728,9 +4729,15 @@ static long ext4_zero_range(struct file *file, loff_t offset,
>>  		return ret;
>>  
>>  	/* Zero out partial block at the edges of the range */
>> -	ret = ext4_zero_partial_blocks(inode, offset, len);
>> +	ret = ext4_zero_partial_blocks(inode, offset, len, &partial_zeroed);
>>  	if (ret)
>>  		return ret;
>> +	if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
>> +		ret = filemap_write_and_wait_range(inode->i_mapping, offset,
>> +						   end - 1);
>> +		if (ret)
>> +			return ret;
>> +	}
> 
> Shouldn't we handle this somewhat below, where we do:
> 
>         if (file->f_flags & O_SYNC)
>                 ext4_handle_sync(handle);
> 
> It would be logical to keep these together... Similarly for
> ext4_punch_hole() below. An yes, the check when calling ext4_handle_sync()
> should be extended with an IS_SYNC() check, which is a preexisting bug.
> 
> 								Honza

Yeah, right, I've done this in the next patch. Thank you for your review!

Cheers,
Yi.

> 
>>  
>>  	handle = ext4_journal_start(inode, EXT4_HT_MISC, 1);
>>  	if (IS_ERR(handle)) {
>> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
>> index 8aa4369e3150..b934ad86a96d 100644
>> --- a/fs/ext4/inode.c
>> +++ b/fs/ext4/inode.c
>> @@ -4227,7 +4227,8 @@ int ext4_block_zero_eof(struct inode *inode, loff_t from, loff_t end)
>>  	return 0;
>>  }
>>  
>> -int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length)
>> +int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length,
>> +			     bool *did_zero)
>>  {
>>  	struct super_block *sb = inode->i_sb;
>>  	unsigned partial_start, partial_end;
>> @@ -4244,20 +4245,21 @@ int ext4_zero_partial_blocks(struct inode *inode, loff_t lstart, loff_t length)
>>  	/* Handle partial zero within the single block */
>>  	if (start == end &&
>>  	    (partial_start || (partial_end != sb->s_blocksize - 1))) {
>> -		err = ext4_block_zero_range(inode, lstart, length, NULL, NULL);
>> +		err = ext4_block_zero_range(inode, lstart, length, did_zero,
>> +					    NULL);
>>  		return err;
>>  	}
>>  	/* Handle partial zero out on the start of the range */
>>  	if (partial_start) {
>>  		err = ext4_block_zero_range(inode, lstart, sb->s_blocksize,
>> -					    NULL, NULL);
>> +					    did_zero, NULL);
>>  		if (err)
>>  			return err;
>>  	}
>>  	/* Handle partial zero out on the end of the range */
>>  	if (partial_end != sb->s_blocksize - 1)
>>  		err = ext4_block_zero_range(inode, byte_end - partial_end,
>> -					    partial_end + 1, NULL, NULL);
>> +					    partial_end + 1, did_zero, NULL);
>>  	return err;
>>  }
>>  
>> @@ -4406,6 +4408,7 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
>>  	loff_t end = offset + length;
>>  	handle_t *handle;
>>  	unsigned int credits;
>> +	bool partial_zeroed = false;
>>  	int ret;
>>  
>>  	trace_ext4_punch_hole(inode, offset, length, 0);
>> @@ -4441,9 +4444,15 @@ int ext4_punch_hole(struct file *file, loff_t offset, loff_t length)
>>  	if (ret)
>>  		return ret;
>>  
>> -	ret = ext4_zero_partial_blocks(inode, offset, length);
>> +	ret = ext4_zero_partial_blocks(inode, offset, length, &partial_zeroed);
>>  	if (ret)
>>  		return ret;
>> +	if (((file->f_flags & O_SYNC) || IS_SYNC(inode)) && partial_zeroed) {
>> +		ret = filemap_write_and_wait_range(inode->i_mapping, offset,
>> +						   end - 1);
>> +		if (ret)
>> +			return ret;
>> +	}
>>  
>>  	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
>>  		credits = ext4_chunk_trans_extent(inode, 0);
>> -- 
>> 2.52.0
>>